testFW: GTEST
testCaseFolder:
- ./compute/test/cker
- - ./runtime/onert/core/src/backend/cpu_common
+ - ./runtime/onert/core/src/backend/basic
- ./runtime/onert/frontend/nnapi
- ./runtime/onert/test/core/compiler
- ./runtime/onert/test/core/exec
- functionName:
starts:
- TEST
-
+
negativeTestCase:
- condition:
- testName:
----
Language: Cpp
BasedOnStyle: Google
AccessModifierOffset: -2
BinPackArguments: true
BinPackParameters: true
BraceWrapping:
- AfterClass: true
- AfterControlStatement: true
- AfterEnum: true
- AfterFunction: true
- AfterNamespace: true
- AfterObjCDeclaration: false
- AfterStruct: true
- AfterUnion: false
- BeforeCatch: true
- BeforeElse: true
- IndentBraces: false
+ AfterClass: true
+ AfterControlStatement: true
+ AfterEnum: true
+ AfterFunction: true
+ AfterNamespace: true
+ AfterObjCDeclaration: false
+ AfterStruct: true
+ AfterUnion: false
+ AfterExternBlock: false
+ BeforeCatch: true
+ BeforeElse: true
+ IndentBraces: false
BreakBeforeBraces: Custom
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
ColumnLimit: 100
CommentPragmas: '^ IWYU pragma:'
ConstructorInitializerAllOnOneLineOrOnePerLine: false
-ConstructorInitializerIndentWidth: 4
-ContinuationIndentWidth: 4
+ConstructorInitializerIndentWidth: 2
+ContinuationIndentWidth: 2
Cpp11BracedListStyle: true
DerivePointerAlignment: false
DisableFormat: false
ExperimentalAutoDetectBinPacking: false
+FixNamespaceComments: true
IncludeCategories:
- Regex: '^"(llvm|llvm-c|clang|clang-c)/'
Priority: 2
PointerAlignment: Right
ReflowComments: true
SortIncludes: false
+SortUsingDeclarations: false
SpaceAfterCStyleCast: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: ControlStatements
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: Cpp11
-TabWidth: 4
+TabWidth: 2
UseTab: Never
+++ /dev/null
-Language: Cpp
-BasedOnStyle: Google
-AccessModifierOffset: -2
-AlignAfterOpenBracket: Align
-AlignEscapedNewlinesLeft: true
-AlignConsecutiveAssignments: false
-AlignConsecutiveDeclarations: false
-AlignOperands: true
-AlignTrailingComments: true
-AllowAllParametersOfDeclarationOnNextLine: true
-AllowShortBlocksOnASingleLine: false
-AllowShortCaseLabelsOnASingleLine: false
-AllowShortFunctionsOnASingleLine: All
-AllowShortIfStatementsOnASingleLine: false
-AllowShortLoopsOnASingleLine: false
-AlwaysBreakAfterDefinitionReturnType: None
-AlwaysBreakAfterReturnType: None
-AlwaysBreakBeforeMultilineStrings: false
-AlwaysBreakTemplateDeclarations: false
-BinPackArguments: true
-BinPackParameters: true
-BraceWrapping:
- AfterClass: true
- AfterControlStatement: true
- AfterEnum: true
- AfterFunction: true
- AfterNamespace: true
- AfterObjCDeclaration: false
- AfterStruct: true
- AfterUnion: false
- AfterExternBlock: false
- BeforeCatch: true
- BeforeElse: true
- IndentBraces: false
-BreakBeforeBraces: Custom
-BreakBeforeTernaryOperators: true
-BreakConstructorInitializersBeforeComma: false
-BreakAfterJavaFieldAnnotations: false
-BreakStringLiterals: true
-ColumnLimit: 100
-CommentPragmas: '^ IWYU pragma:'
-ConstructorInitializerAllOnOneLineOrOnePerLine: false
-ConstructorInitializerIndentWidth: 2
-ContinuationIndentWidth: 2
-Cpp11BracedListStyle: true
-DerivePointerAlignment: false
-DisableFormat: false
-ExperimentalAutoDetectBinPacking: false
-FixNamespaceComments: false
-IncludeCategories:
- - Regex: '^"(llvm|llvm-c|clang|clang-c)/'
- Priority: 2
- - Regex: '^(<|"(gtest|isl|json)/)'
- Priority: 3
- - Regex: '.*'
- Priority: 1
-IndentCaseLabels: true
-IndentWidth: 2
-IndentWrappedFunctionNames: false
-JavaScriptQuotes: Leave
-JavaScriptWrapImports: true
-KeepEmptyLinesAtTheStartOfBlocks: true
-MacroBlockBegin: ''
-MacroBlockEnd: ''
-MaxEmptyLinesToKeep: 1
-NamespaceIndentation: None
-ObjCBlockIndentWidth: 2
-ObjCSpaceAfterProperty: false
-ObjCSpaceBeforeProtocolList: true
-PenaltyBreakBeforeFirstCallParameter: 19
-PenaltyBreakComment: 300
-PenaltyBreakFirstLessLess: 120
-PenaltyBreakString: 1000
-PenaltyExcessCharacter: 1000000
-PenaltyReturnTypeOnItsOwnLine: 60
-PointerAlignment: Right
-ReflowComments: true
-SortIncludes: false
-SortUsingDeclarations: false
-SpaceAfterCStyleCast: false
-SpaceBeforeAssignmentOperators: true
-SpaceBeforeParens: ControlStatements
-SpaceInEmptyParentheses: false
-SpacesBeforeTrailingComments: 1
-SpacesInAngles: false
-SpacesInContainerLiterals: true
-SpacesInCStyleCastParentheses: false
-SpacesInParentheses: false
-SpacesInSquareBrackets: false
-Standard: Cpp11
-TabWidth: 2
-UseTab: Never
mkdir -p $(OVERLAY_FOLDER)/lib
cp $(EXT_ACL_FOLDER)/* $(OVERLAY_FOLDER)/lib
# Make stamp file
- printf "20.05" > $(OVERLAY_FOLDER)/ARMCOMPUTE.stamp
+ printf "21.02" > $(OVERLAY_FOLDER)/ARMCOMPUTE.stamp
endif
NNFW_WORKSPACE="$(WORKSPACE)" NNFW_INSTALL_PREFIX=$(INSTALL_PATH) ./nnfw configure \
- Please post questions, issues, or suggestions into [Issues](https://github.com/Samsung/ONE/issues). This is the best way to communicate with the developer.
- You can also have an open discussion with community members through [gitter.im](https://gitter.im/Samsung/ONE) channel.
-
-## Hall of Fame
-
-[](https://sourcerer.io/fame/lemmaa/Samsung/ONE/links/0)[](https://sourcerer.io/fame/lemmaa/Samsung/ONE/links/1)[](https://sourcerer.io/fame/lemmaa/Samsung/ONE/links/2)[](https://sourcerer.io/fame/lemmaa/Samsung/ONE/links/3)[](https://sourcerer.io/fame/lemmaa/Samsung/ONE/links/4)[](https://sourcerer.io/fame/lemmaa/Samsung/ONE/links/5)[](https://sourcerer.io/fame/lemmaa/Samsung/ONE/links/6)[](https://sourcerer.io/fame/lemmaa/Samsung/ONE/links/7)
-
- ./arser
- ./circle2circle
- ./circle-quantizer
+ - ./crew
- ./cwrap
- ./foder
- ./hermes
- ./logo-core
- ./luci
- ./luci-interpreter
+ - ./luci-eval-driver
+ - ./luci-pass-value-test
- ./luci-value-test
- ./mio-circle
- ./mio-tflite
- ./record-minmax
- ./safemain
- ./souschef
- - ./stdex
- ./tflite2circle
testFile:
{
public:
explicit Overlay(const Shape &shape, const Layout &layout, T *base)
- : View<T>{shape, layout}, _base{base}
+ : View<T>{shape, layout}, _base{base}
{
// DO NOTHING
}
{
public:
Shape(uint32_t depth, uint32_t height, uint32_t width)
- : _depth{depth}, _height{height}, _width{width}
+ : _depth{depth}, _height{height}, _width{width}
{
// DO NOTHING
}
{
public:
explicit Overlay(const Shape &shape, const Layout &layout, InputIt it)
- : _impl{shape, layout}, _it{it}
+ : _impl{shape, layout}, _it{it}
{
// DO NOTHING
}
{
public:
Shape(uint32_t count, uint32_t depth, uint32_t height, uint32_t width)
- : _count{count}, _depth{depth}, _height{height}, _width{width}
+ : _count{count}, _depth{depth}, _height{height}, _width{width}
{
// DO NOTHING
}
{
public:
explicit Overlay(const Shape &shape, const Layout &layout, T *base)
- : View<T>{shape, layout}, _base{base}
+ : View<T>{shape, layout}, _base{base}
{
// DO NOTHING
}
{
public:
explicit View(const Shape &shape, const Layout &layout)
- : _shape{shape}, _layout{std::move(layout)}
+ : _shape{shape}, _layout{std::move(layout)}
{
// DO NOTHING
}
const Shape shape{4, 6, 3};
int data[4 * 6 * 3] = {
- 0,
+ 0,
};
auto overlay = make_overlay<int, CHWLayout>(shape, data);
const Shape shape{4, 6, 3};
int data[4 * 6 * 3] = {
- 0,
+ 0,
};
const auto overlay = make_overlay<int, CHWLayout>(shape, data);
const Shape shape{4, 6, 3};
int data[4 * 6 * 3] = {
- 0,
+ 0,
};
auto overlay = make_overlay<int, CHWLayout>(shape, data);
const Shape shape{2, 4, 6, 3};
int data[2 * 4 * 6 * 3] = {
- 0,
+ 0,
};
auto overlay = make_overlay<int, NCHWLayout>(shape, data);
const Shape shape{2, 4, 6, 3};
int data[2 * 4 * 6 * 3] = {
- 0,
+ 0,
};
const auto overlay = make_overlay<int, NCHWLayout>(shape, data);
const Shape shape{2, 4, 6, 3};
int data[2 * 4 * 6 * 3] = {
- 0,
+ 0,
};
auto overlay = make_overlay<int, NCHWLayout>(shape, data);
const Shape shape{2, 3};
int data[2 * 3] = {
- 0,
+ 0,
};
auto view = make_overlay<int, LexicalLayout>(shape, data);
const Shape shape{2, 3};
int data[2 * 3] = {
- 0,
+ 0,
};
const auto view = make_overlay<int, LexicalLayout>(shape, data);
const Shape shape{2, 3};
int data[2 * 3] = {
- 0,
+ 0,
};
auto view = make_overlay<int, LexicalLayout>(shape, data);
# It means that a developer who want to link arser just need to add one line.
# target_link_library(another-users-target arser)
target_include_directories(arser INTERFACE include/)
+target_link_libraries(arser INTERFACE nncc_coverage)
if(NOT ENABLE_TEST)
return()
endif(NOT ENABLE_TEST)
nnas_find_package(GTest REQUIRED)
-set(TESTS "${CMAKE_CURRENT_SOURCE_DIR}/tests/arser.test.cpp")
+set(TESTS "${CMAKE_CURRENT_SOURCE_DIR}/tests/arser.test.cpp"
+ "${CMAKE_CURRENT_SOURCE_DIR}/tests/HelpMessage.test.cpp")
GTest_AddTest(arser_test ${TESTS})
-target_include_directories(arser_test PRIVATE include)
+target_link_libraries(arser_test arser)
* limitations under the License.
*/
+#ifndef __ARSER_H__
+#define __ARSER_H__
+
#include <iostream>
#include <sstream>
#include <cstring>
-namespace
+#include <cassert>
+
+namespace arser
+{
+namespace internal
{
template <typename T> T lexical_cast(const std::string &str)
return data;
}
-template <> bool lexical_cast(const std::string &str)
+template <> inline bool lexical_cast(const std::string &str)
{
bool data = true;
if (str == "false" || str == "False" || str == "FALSE" || str == "0")
template <> inline std::string to_string(const bool value) { return value ? "true" : "false"; }
-} // namespace
+/**
+ * @brief Returns the string with the leading dash removed.
+ *
+ * If there is no dash, it returns as it is.
+ */
+inline std::string remove_dash(const std::string &str)
+{
+ std::string ret{str};
+ auto pos = ret.find_first_not_of('-');
+ if (pos == std::string::npos)
+ return ret;
+ return ret.substr(pos);
+}
+
+/**
+ * @brief Returns the string that created by concatenating the elements of a vector with commas.
+ */
+inline std::string make_comma_concatenated(const std::vector<std::string> &vec)
+{
+ std::ostringstream oss;
+ std::copy(vec.begin(), std::prev(vec.end()), std::ostream_iterator<std::string>(oss, ", "));
+ oss << vec.back();
+ return oss.str();
+}
+
+} // namespace internal
+} // namespace arser
namespace arser
{
class Arser;
+/**
+ * Argument
+ * ├── positional argument
+ * └── optioanl argument [ dash at the beginning of the string ]
+ * ├── long option [ two or more dashes ]
+ * └── short option [ one dash ]
+ *
+ * Argument has two types - positional argument, optional argument.
+ *
+ * The way to distinguish the two types is whether there is a dash('-') at the beginning of the
+ * string.
+ *
+ * And, optional argument has two types as well - long option, short option, which is distinguished
+ * by the number of dash.
+ */
class Argument
{
public:
- explicit Argument(const std::string &arg_name) : _name{arg_name} {}
+ explicit Argument(const std::string &arg_name) : _long_name{arg_name}, _names{arg_name} {}
+ explicit Argument(const std::string &short_name, const std::string &long_name)
+ : _short_name{short_name}, _long_name{long_name}, _names{short_name, long_name}
+ {
+ }
+ explicit Argument(const std::string &short_name, const std::string &long_name,
+ const std::vector<std::string> &names)
+ : _short_name{short_name}, _long_name{long_name}, _names{names}
+ {
+ // 'names' must have 'short_name' and 'long_name'.
+ auto it = std::find(names.begin(), names.end(), short_name);
+ assert(it != names.end());
+ it = std::find(names.begin(), names.end(), long_name);
+ assert(it != names.end());
+ // for avoiding unused warning.
+ (void)it;
+ }
Argument &nargs(uint32_t num)
{
{
if ((_nargs <= 1 && TypeName<T>::Get() == _type) ||
(_nargs > 1 && TypeName<std::vector<T>>::Get() == _type))
- _values.emplace_back(::to_string(value));
+ _values.emplace_back(internal::to_string(value));
else
{
throw std::runtime_error("Type mismatch. "
if ((_nargs <= 1 && TypeName<T>::Get() == _type) ||
(_nargs > 1 && TypeName<std::vector<T>>::Get() == _type))
{
- _values.emplace_back(::to_string(value));
+ _values.emplace_back(internal::to_string(value));
default_value(values...);
}
else
}
private:
- std::string _name;
+ // The '_names' vector contains all of the options specified by the user.
+ // And among them, '_long_name' and '_short_name' are selected.
+ std::string _long_name;
+ std::string _short_name;
+ std::vector<std::string> _names;
std::string _type;
std::string _help_message;
std::function<void(void)> _func;
{
public:
explicit Arser(const std::string &program_description = {})
- : _program_description{program_description}
+ : _program_description{program_description}
{
- add_argument("--help").help("Show help message and exit").nargs(0);
+ add_argument("-h", "--help").help("Show help message and exit").nargs(0);
}
Argument &add_argument(const std::string &arg_name)
{
- if (arg_name.at(0) != '-')
+ if (arg_name.at(0) != '-') /* positional */
{
_positional_arg_vec.emplace_back(arg_name);
_arg_map[arg_name] = &_positional_arg_vec.back();
}
- else
+ else /* optional */
{
+ // The length of optional argument name must be 2 or more.
+ // And it shouldn't be hard to recognize. e.g. '-', '--'
+ if (arg_name.size() < 2)
+ {
+ throw std::runtime_error("Too short name. The length of argument name must be 2 or more.");
+ }
+ if (arg_name == "--")
+ {
+ throw std::runtime_error(
+ "Too short name. Option name must contain at least one character other than dash.");
+ }
_optional_arg_vec.emplace_back(arg_name);
+ _optional_arg_vec.back()._short_name = arg_name;
_arg_map[arg_name] = &_optional_arg_vec.back();
}
return *_arg_map[arg_name];
}
+ Argument &add_argument(const std::vector<std::string> &arg_name_vec)
+ {
+ assert(arg_name_vec.size() >= 2);
+ std::string long_opt, short_opt;
+ // find long and short option
+ for (const auto &arg_name : arg_name_vec)
+ {
+ if (arg_name.at(0) != '-')
+ {
+ throw std::runtime_error("Invalid argument. "
+ "Positional argument cannot have short option.");
+ }
+ assert(arg_name.size() >= 2);
+ if (long_opt.empty() && arg_name.at(0) == '-' && arg_name.at(1) == '-')
+ {
+ long_opt = arg_name;
+ }
+ if (short_opt.empty() && arg_name.at(0) == '-' && arg_name.at(1) != '-')
+ {
+ short_opt = arg_name;
+ }
+ }
+ // If one of the two is empty, fill it with the non-empty one for pretty printing.
+ if (long_opt.empty())
+ {
+ assert(not short_opt.empty());
+ long_opt = short_opt;
+ }
+ if (short_opt.empty())
+ {
+ assert(not long_opt.empty());
+ short_opt = long_opt;
+ }
+
+ _optional_arg_vec.emplace_back(short_opt, long_opt, arg_name_vec);
+ for (const auto &arg_name : arg_name_vec)
+ {
+ _arg_map[arg_name] = &_optional_arg_vec.back();
+ }
+ return _optional_arg_vec.back();
+ }
+
+ template <typename... Ts> Argument &add_argument(const std::string &arg_name, Ts... arg_names)
+ {
+ if (sizeof...(arg_names) == 0)
+ {
+ return add_argument(arg_name);
+ }
+ // sizeof...(arg_names) > 0
+ else
+ {
+ return add_argument(std::vector<std::string>{arg_name, arg_names...});
+ }
+ }
+
+ void validate_arguments(void)
+ {
+ // positional argument is always required.
+ for (const auto &arg : _positional_arg_vec)
+ {
+ if (arg._is_required)
+ {
+ throw std::runtime_error("Invalid arguments. Positional argument must always be required.");
+ }
+ }
+ }
+
void parse(int argc, char **argv)
{
+ validate_arguments();
_program_name = argv[0];
_program_name.erase(0, _program_name.find_last_of("/\\") + 1);
if (argc >= 2)
{
- if (!std::strcmp(argv[1], "--help"))
+ if (!std::strcmp(argv[1], "--help") || !std::strcmp(argv[1], "-h"))
{
std::cout << *this;
std::exit(0);
for (const auto &arg : _arg_map)
{
const auto &func = arg.second->_func;
- if (func && !std::strcmp(argv[1], arg.second->_name.c_str()))
+ if (func && !std::strcmp(argv[1], arg.first.c_str()))
{
func();
std::exit(0);
template <typename T> T get(const std::string &arg_name);
+ friend std::ostream &operator<<(std::ostream &stream, const Arser &parser)
+ {
+ // print description
+ if (!parser._program_description.empty())
+ {
+ stream << "What " << parser._program_name << " does: " << parser._program_description
+ << "\n\n";
+ }
+ /*
+ ** print usage
+ */
+ stream << "Usage: ./" << parser._program_name << " ";
+ // required optional argument
+ for (const auto &arg : parser._optional_arg_vec)
+ {
+ if (!arg._is_required)
+ continue;
+ stream << arg._short_name << " ";
+ std::string arg_name = arser::internal::remove_dash(arg._long_name);
+ std::for_each(arg_name.begin(), arg_name.end(),
+ [&stream](const char &c) { stream << static_cast<char>(::toupper(c)); });
+ stream << " ";
+ }
+ // rest of the optional argument
+ for (const auto &arg : parser._optional_arg_vec)
+ {
+ if (arg._is_required)
+ continue;
+ stream << "[" << arg._short_name;
+ if (arg._nargs)
+ {
+ stream << " ";
+ std::string arg_name = arser::internal::remove_dash(arg._long_name);
+ std::for_each(arg_name.begin(), arg_name.end(),
+ [&stream](const char &c) { stream << static_cast<char>(::toupper(c)); });
+ }
+ stream << "]"
+ << " ";
+ }
+ // positional arguement
+ for (const auto &arg : parser._positional_arg_vec)
+ {
+ stream << arg._long_name << " ";
+ }
+ stream << "\n\n";
+ /*
+ ** print argument list and its help message
+ */
+ // get the length of the longest argument
+ size_t length_of_longest_arg = 0;
+ for (const auto &arg : parser._positional_arg_vec)
+ {
+ length_of_longest_arg = std::max(length_of_longest_arg,
+ arser::internal::make_comma_concatenated(arg._names).size());
+ }
+ for (const auto &arg : parser._optional_arg_vec)
+ {
+ length_of_longest_arg = std::max(length_of_longest_arg,
+ arser::internal::make_comma_concatenated(arg._names).size());
+ }
+
+ const size_t message_width = 60;
+ // positional argument
+ if (!parser._positional_arg_vec.empty())
+ {
+ stream << "[Positional argument]" << std::endl;
+ for (const auto &arg : parser._positional_arg_vec)
+ {
+ stream.width(length_of_longest_arg);
+ stream << std::left << arser::internal::make_comma_concatenated(arg._names) << "\t";
+ for (size_t i = 0; i < arg._help_message.length(); i += message_width)
+ {
+ if (i)
+ stream << std::string(length_of_longest_arg, ' ') << "\t";
+ stream << arg._help_message.substr(i, message_width) << std::endl;
+ }
+ }
+ std::cout << std::endl;
+ }
+ // optional argument
+ if (!parser._optional_arg_vec.empty())
+ {
+ stream << "[Optional argument]" << std::endl;
+ for (const auto &arg : parser._optional_arg_vec)
+ {
+ stream.width(length_of_longest_arg);
+ stream << std::left << arser::internal::make_comma_concatenated(arg._names) << "\t";
+ for (size_t i = 0; i < arg._help_message.length(); i += message_width)
+ {
+ if (i)
+ stream << std::string(length_of_longest_arg, ' ') << "\t";
+ stream << arg._help_message.substr(i, message_width) << std::endl;
+ }
+ }
+ }
+
+ return stream;
+ }
+
private:
std::string _program_name;
std::string _program_description;
std::list<Argument> _positional_arg_vec;
std::list<Argument> _optional_arg_vec;
std::map<std::string, Argument *> _arg_map;
-
- friend std::ostream &operator<<(std::ostream &, const Arser &);
};
template <typename T> T Arser::get_impl(const std::string &arg_name, T *)
auto arg = _arg_map.find(arg_name);
if (arg == _arg_map.end())
throw std::runtime_error("Invalid argument. "
- "There is no argument you are looking for.");
+ "There is no argument you are looking for: " +
+ arg_name);
if (arg->second->_type != TypeName<T>::Get())
throw std::runtime_error("Type mismatch. "
"You must make sure that the argument is given before accessing it. "
"You can do it by calling arser[\"argument\"].");
- return ::lexical_cast<T>(arg->second->_values[0]);
+ return internal::lexical_cast<T>(arg->second->_values[0]);
}
template <typename T> std::vector<T> Arser::get_impl(const std::string &arg_name, std::vector<T> *)
auto arg = _arg_map.find(arg_name);
if (arg == _arg_map.end())
throw std::runtime_error("Invalid argument. "
- "There is no argument you are looking for.");
+ "There is no argument you are looking for: " +
+ arg_name);
if (arg->second->_type != TypeName<std::vector<T>>::Get())
throw std::runtime_error("Type mismatch. "
std::vector<T> data;
std::transform(arg->second->_values.begin(), arg->second->_values.end(), std::back_inserter(data),
- [](std::string str) -> T { return ::lexical_cast<T>(str); });
+ [](std::string str) -> T { return internal::lexical_cast<T>(str); });
return data;
}
return get_impl(arg_name, static_cast<T *>(nullptr));
}
-std::ostream &operator<<(std::ostream &stream, const Arser &parser)
-{
- // print description
- if (!parser._program_description.empty())
- {
- stream << "What " << parser._program_name << " does: " << parser._program_description << "\n\n";
- }
- /*
- ** print usage
- */
- stream << "Usage: ./" << parser._program_name << " ";
- // required optional argument
- for (const auto &arg : parser._optional_arg_vec)
- {
- if (!arg._is_required)
- continue;
- stream << arg._name << " ";
- std::string arg_name = arg._name.substr(2);
- std::for_each(arg_name.begin(), arg_name.end(),
- [&stream](const char &c) { stream << static_cast<char>(::toupper(c)); });
- stream << " ";
- }
- // rest of the optional argument
- for (const auto &arg : parser._optional_arg_vec)
- {
- if (arg._is_required)
- continue;
- stream << "[" << arg._name;
- if (arg._nargs)
- {
- stream << " ";
- std::string arg_name = arg._name.substr(2);
- std::for_each(arg_name.begin(), arg_name.end(),
- [&stream](const char &c) { stream << static_cast<char>(::toupper(c)); });
- }
- stream << "]"
- << " ";
- }
- // positional arguement
- for (const auto &arg : parser._positional_arg_vec)
- {
- stream << arg._name << " ";
- }
- stream << "\n\n";
- /*
- ** print argument list and its help message
- */
- // get the length of the longest argument
- size_t length_of_longest_arg = 0;
- for (const auto &arg : parser._positional_arg_vec)
- {
- length_of_longest_arg = std::max(length_of_longest_arg, arg._name.length());
- }
- for (const auto &arg : parser._optional_arg_vec)
- {
- length_of_longest_arg = std::max(length_of_longest_arg, arg._name.length());
- }
-
- const size_t message_width = 60;
- // positional argument
- if (!parser._positional_arg_vec.empty())
- {
- stream << "[Positional argument]" << std::endl;
- for (const auto &arg : parser._positional_arg_vec)
- {
- stream.width(length_of_longest_arg);
- stream << std::left << arg._name << "\t";
- for (size_t i = 0; i < arg._help_message.length(); i += message_width)
- {
- if (i)
- stream << std::string(length_of_longest_arg, ' ') << "\t";
- stream << arg._help_message.substr(i, message_width) << std::endl;
- }
- }
- std::cout << std::endl;
- }
- // optional argument
- if (!parser._optional_arg_vec.empty())
- {
- stream << "[Optional argument]" << std::endl;
- for (const auto &arg : parser._optional_arg_vec)
- {
- stream.width(length_of_longest_arg);
- stream << std::left << arg._name << "\t";
- for (size_t i = 0; i < arg._help_message.length(); i += message_width)
- {
- if (i)
- stream << std::string(length_of_longest_arg, ' ') << "\t";
- stream << arg._help_message.substr(i, message_width) << std::endl;
- }
- }
- }
-
- return stream;
-}
-
} // namespace arser
+
+#endif // __ARSER_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "arser/arser.h"
+
+#include "Prompt.h"
+
+using namespace arser;
+
+/**
+ * [WARNING] DO NOT GIVE THE ARSER '-h' or '--help' OPTION IN BELOW TESTS.
+ *
+ * arser exits with code 0 when '-h' option is given, which forces googletest to pass.
+ */
+
+TEST(HelpMessageTest, Default)
+{
+ /* arrange */
+ Arser arser;
+
+ arser.add_argument("--dummy").nargs(0).help("Dummy optional argument");
+
+ std::ostringstream oss;
+ std::string expected_out = "Usage: ./arser [-h] [--dummy] \n"
+ "\n"
+ "[Optional argument]\n"
+ "-h, --help Show help message and exit\n"
+ "--dummy \tDummy optional argument\n";
+
+ test::Prompt prompt("./arser --dummy");
+ /* act */
+ arser.parse(prompt.argc(), prompt.argv());
+ oss << arser;
+
+ /* assert */
+ EXPECT_EQ(expected_out, oss.str());
+}
+
+TEST(HelpMessageTest, ShortOption)
+{
+ /* arrange */
+ Arser arser;
+
+ arser.add_argument("-v", "--verbose").nargs(0).help("Provides additional details");
+
+ std::ostringstream oss;
+ std::string expected_out = "Usage: ./arser [-h] [-v] \n"
+ "\n"
+ "[Optional argument]\n"
+ "-h, --help \tShow help message and exit\n"
+ "-v, --verbose\tProvides additional details\n";
+
+ test::Prompt prompt("./arser -v");
+ /* act */
+ arser.parse(prompt.argc(), prompt.argv());
+ oss << arser;
+
+ /* assert */
+ EXPECT_EQ(expected_out, oss.str());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ARSER_PROMPT_H__
+#define __ARSER_PROMPT_H__
+
+#include <iterator>
+#include <sstream>
+#include <string>
+#include <vector>
+
+namespace arser
+{
+namespace test
+{
+
+class Prompt
+{
+public:
+ Prompt(const std::string &command)
+ {
+ std::istringstream iss(command);
+ std::vector<std::string> token(std::istream_iterator<std::string>{iss},
+ std::istream_iterator<std::string>());
+ _arg = std::move(token);
+ _argv.reserve(_arg.size());
+ for (const auto &t : _arg)
+ {
+ _argv.push_back(const_cast<char *>(t.data()));
+ }
+ }
+ int argc(void) const { return _argv.size(); }
+ char **argv(void) { return _argv.data(); }
+
+private:
+ std::vector<char *> _argv;
+ std::vector<std::string> _arg;
+};
+
+} // namespace test
+} // namespace arser
+
+#endif // __ARSER_PROMPT_H__
Arser arser;
arser.add_argument("--verbose")
- .nargs(0)
- .help("It provides additional details as to what the executable is doing");
+ .nargs(0)
+ .help("It provides additional details as to what the executable is doing");
Prompt prompt("./executable --verbose");
/* act */
Arser arser;
arser.add_argument("--volume")
- .nargs(1)
- .type(arser::DataType::INT32)
- .help("Set a volume as you provided.");
+ .nargs(1)
+ .type(arser::DataType::INT32)
+ .help("Set a volume as you provided.");
arser.add_argument("--frequency")
- .nargs(1)
- .type(arser::DataType::FLOAT)
- .help("Set a frequency as you provided.");
+ .nargs(1)
+ .type(arser::DataType::FLOAT)
+ .help("Set a frequency as you provided.");
Prompt prompt("./radio --volume 5 --frequency 128.5");
/* act */
Arser arser;
arser.add_argument("--weight")
- .nargs(1)
- .type(arser::DataType::INT32)
- .help("Set a volume as you provided.");
+ .nargs(1)
+ .type(arser::DataType::INT32)
+ .help("Set a volume as you provided.");
Prompt prompt("./radio"); // empty argument
/* act */
Arser arser;
arser.add_argument("--volume")
- .nargs(1)
- .type(arser::DataType::INT32)
- .required()
- .help("Set a volume as you provided.");
+ .nargs(1)
+ .type(arser::DataType::INT32)
+ .required()
+ .help("Set a volume as you provided.");
Prompt prompt("./radio");
/* act */ /* assert */
Arser arser;
arser.add_argument("--input_path")
- .nargs(1)
- .type(arser::DataType::STR)
- .help("input path of this program.")
- .required();
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .help("input path of this program.")
+ .required();
arser.add_argument("--output_path")
- .nargs(1)
- .type(arser::DataType::STR)
- .help("output path of this program.")
- .required(true);
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .help("output path of this program.")
+ .required(true);
arser.add_argument("--training_data")
- .nargs(5)
- .type(arser::DataType::INT32_VEC)
- .help("give traning data to this program.")
- .required();
+ .nargs(5)
+ .type(arser::DataType::INT32_VEC)
+ .help("give traning data to this program.")
+ .required();
Prompt prompt("./ml --input_path /I/am/in.put --output_path I/am/out.put "
"--training_data 2 43 234 3 334");
Arser arser;
arser.add_argument("--add_float")
- .nargs(2)
- .type(arser::DataType::FLOAT_VEC)
- .help("Add two float numbers.");
+ .nargs(2)
+ .type(arser::DataType::FLOAT_VEC)
+ .help("Add two float numbers.");
Prompt prompt("./calculator --add_float 3.2 5.4");
/* act */
Arser arser;
arser.add_argument("--three_color")
- .nargs(3)
- .type(arser::DataType::STR_VEC)
- .help("insert your three favorite color");
+ .nargs(3)
+ .type(arser::DataType::STR_VEC)
+ .help("insert your three favorite color");
Prompt prompt("./color_factory --three_color red blue yellow");
/* act */
Arser arser;
arser.add_argument("--version")
- .help("Show version and exit")
- .exit_with(std::bind(printVersion, "1.2.0"));
+ .help("Show version and exit")
+ .exit_with(std::bind(printVersion, "1.2.0"));
Prompt prompt("./arser --version");
/* act */ /* assert */
Arser arser;
arser.add_argument("--delivery")
- .nargs(3)
- .type(arser::DataType::STR_VEC)
- .default_value("pizza", "chicken", "hamburger")
- .help("Enter three foods that you want to deliver");
+ .nargs(3)
+ .type(arser::DataType::STR_VEC)
+ .default_value("pizza", "chicken", "hamburger")
+ .help("Enter three foods that you want to deliver");
arser.add_argument("--assistant")
- .type(arser::DataType::STR)
- .default_value("Bixby")
- .help("Enter name of your assistant");
+ .type(arser::DataType::STR)
+ .default_value("Bixby")
+ .help("Enter name of your assistant");
arser.add_argument("--sound")
- .type(arser::DataType::BOOL)
- .nargs(1)
- .default_value(true)
- .help("Sound on/off");
+ .type(arser::DataType::BOOL)
+ .nargs(1)
+ .default_value(true)
+ .help("Sound on/off");
arser.add_argument("--number")
- .type(arser::DataType::INT32_VEC)
- .nargs(4)
- .default_value(1, 2, 3, 4)
- .help("Enter the number that you want to call");
+ .type(arser::DataType::INT32_VEC)
+ .nargs(4)
+ .default_value(1, 2, 3, 4)
+ .help("Enter the number that you want to call");
arser.add_argument("--time")
- .type(arser::DataType::INT32_VEC)
- .nargs(3)
- .default_value(0, 0, 0)
- .help("Current time(H/M/S)");
+ .type(arser::DataType::INT32_VEC)
+ .nargs(3)
+ .default_value(0, 0, 0)
+ .help("Current time(H/M/S)");
arser.add_argument("--name")
- .type(arser::DataType::STR)
- .nargs(1)
- .default_value("no name")
- .help("Enter your name");
+ .type(arser::DataType::STR)
+ .nargs(1)
+ .default_value("no name")
+ .help("Enter your name");
Prompt prompt("/phone --time 1 52 34 --name arser");
/* act */
// 1 string, 1 argument
EXPECT_EQ("arser", arser.get<std::string>("--name"));
}
+
+TEST(BasicTest, shortOption)
+{
+ /* arrange */
+ Arser arser;
+
+ arser.add_argument("--input_path", "-i")
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .help("input path of this program.")
+ .required();
+ arser.add_argument("--output_path", "-o")
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .help("output path of this program.")
+ .required(true);
+
+ Prompt prompt("./driver -i /I/am/in.put --output_path I/am/out.put");
+ /* act */
+ arser.parse(prompt.argc(), prompt.argv());
+ /* assert */
+ EXPECT_TRUE(arser["--input_path"]);
+ EXPECT_EQ("/I/am/in.put", arser.get<std::string>("--input_path"));
+ EXPECT_TRUE(arser["--output_path"]);
+ EXPECT_EQ("I/am/out.put", arser.get<std::string>("--output_path"));
+}
+
+TEST(BasicTest, shortMultipleOption)
+{
+ /* arrange */
+ Arser arser;
+
+ arser.add_argument("--input_path", "-i", "--input", "--in")
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .help("input path of this program.")
+ .required();
+ arser.add_argument("--output_path", "-o")
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .help("output path of this program.")
+ .required(true);
+
+ Prompt prompt("./driver --in /I/am/in.put -o I/am/out.put");
+ /* act */
+ arser.parse(prompt.argc(), prompt.argv());
+ /* assert */
+ EXPECT_TRUE(arser["--input"]);
+ EXPECT_EQ("/I/am/in.put", arser.get<std::string>("--input"));
+ EXPECT_TRUE(arser["--output_path"]);
+ EXPECT_EQ("I/am/out.put", arser.get<std::string>("--output_path"));
+}
+
+TEST(BasicTest, OptWithRequiredDuplicate)
+{
+ /* arrange */
+ Arser arser;
+
+ arser.add_argument("--input_path", "-i", "--input", "--in")
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .help("input path of this program.")
+ .required();
+ arser.add_argument("--output_path", "-o")
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .help("output path of this program.")
+ .required(true);
+
+ Prompt prompt("./driver --in /I/am/in.put -o I/am/out.put -i /I/am/duplicate");
+ /* act */ /* assert */
+ EXPECT_THROW(arser.parse(prompt.argc(), prompt.argv()), std::runtime_error);
+}
+
+TEST(BasicTest, OptWithNonRequiredDuplicate)
+{
+ /* arrange */
+ Arser arser;
+
+ arser.add_argument("--input_path", "-i", "--input", "--in")
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .help("input path of this program.");
+ /* .required() */
+ arser.add_argument("--output_path", "-o")
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .help("output path of this program.")
+ .required(true);
+
+ Prompt prompt("./driver --in /I/am/in.put -o I/am/out.put -i /I/am/duplicate");
+ /* act */
+ arser.parse(prompt.argc(), prompt.argv());
+ /* assert */
+ EXPECT_TRUE(arser["--input"]);
+ EXPECT_EQ("/I/am/duplicate", arser.get<std::string>("--input"));
+ EXPECT_TRUE(arser["--output_path"]);
+ EXPECT_EQ("I/am/out.put", arser.get<std::string>("--output_path"));
+}
public:
template <typename T>
auto operator()(const std::pair<T, T> &p) const
- -> decltype(std::make_pair(std::declval<Callable>()(p.first),
- std::declval<Callable>()(p.second)))
+ -> decltype(std::make_pair(std::declval<Callable>()(p.first),
+ std::declval<Callable>()(p.second)))
{
return std::make_pair(f(p.first), f(p.second));
}
file(GLOB_RECURSE SOURCES "src/*.cpp")
add_executable(caffegen ${SOURCES})
-target_link_libraries(caffegen stdex)
target_link_libraries(caffegen cli)
# NOTE "Caffe" package provides both caffe and caffeproto target
# NOTE "caffeproto" is linked to "caffe"
#include "MergeCommand.h"
#include <cli/App.h>
-#include <stdex/Memory.h>
+#include <memory>
#include <map>
#include <string>
-using stdex::make_unique;
+using std::make_unique;
int main(int argc, char **argv)
{
int entry(int argc, char **argv)
{
arser::Arser arser{
- "circle-inspect allows users to retrieve various information from a Circle model file"};
+ "circle-inspect allows users to retrieve various information from a Circle model file"};
arser.add_argument("--operators").nargs(0).help("Dump operators in circle file");
arser.add_argument("--conv2d_weight")
- .nargs(0)
- .help("Dump Conv2D series weight operators in circle file");
+ .nargs(0)
+ .help("Dump Conv2D series weight operators in circle file");
arser.add_argument("--op_version").nargs(0).help("Dump versions of the operators in circle file");
arser.add_argument("circle").type(arser::DataType::STR).help("Circle file to inspect");
--- /dev/null
+set(SRCS_PART_TESTER
+ src/Driver.cpp
+ src/PModelsRunner.cpp
+ )
+
+add_executable(circle_part_driver ${SRCS_PART_TESTER})
+target_link_libraries(circle_part_driver foder)
+target_link_libraries(circle_part_driver loco)
+target_link_libraries(circle_part_driver luci_import)
+target_link_libraries(circle_part_driver luci_lang)
+target_link_libraries(circle_part_driver luci_log)
+target_link_libraries(circle_part_driver luci_interpreter)
+target_link_libraries(circle_part_driver crew)
+target_link_libraries(circle_part_driver safemain)
+target_link_libraries(circle_part_driver nncc_common)
+
+install(TARGETS circle_part_driver DESTINATION bin)
--- /dev/null
+# circle-part-driver
+
+_circle-part-driver_ is test driver to run partitioned circle models
--- /dev/null
+require("foder")
+require("loco")
+require("luci")
+require("luci-interpreter")
+require("crew")
+require("safemain")
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PModelsRunner.h"
+
+#include <luci/Log.h>
+
+#include <iostream>
+
+int entry(int argc, char **argv)
+{
+ LOGGER(l);
+
+ if (argc != 5)
+ {
+ std::cerr
+ << "Usage: " << argv[0]
+ << " <path/to/partition/config> <num_inputs> <path/to/input/prefix> <path/to/output/file>\n";
+ return EXIT_FAILURE;
+ }
+ // NOTE: about input/output data file name
+ // - I/O file name format is like filename.ext0, filename.ext1, ...
+ // NOTE: about output shape
+ // - file name with filename.ext0.shape, filename.ext1.shape, ...
+ // having one line text content of CSV format(like H,W or N,C,H,W)
+
+ const char *config_filename = argv[1];
+ const int32_t num_inputs = atoi(argv[2]);
+ const char *input_prefix = argv[3];
+ const char *output_file = argv[4];
+
+ prunner::PModelsRunner pmrunner;
+
+ INFO(l) << "Read config file: " << config_filename << std::endl;
+ if (not pmrunner.load_config(config_filename))
+ return EXIT_FAILURE;
+
+ INFO(l) << "Read input file: " << input_prefix << ", #inputs: " << num_inputs << std::endl;
+ pmrunner.load_inputs(input_prefix, num_inputs);
+
+ INFO(l) << "Run all partitioned models..." << std::endl;
+ if (!pmrunner.run())
+ return EXIT_FAILURE;
+
+ INFO(l) << "Save output file: " << output_file << std::endl;
+ pmrunner.save_outputs(output_file);
+
+ return EXIT_SUCCESS;
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PModelsRunner.h"
+
+#include <luci/IR/Nodes/CircleInput.h>
+#include <luci/IR/Nodes/CircleOutput.h>
+#include <luci/Importer.h>
+#include <luci/Log.h>
+#include <luci_interpreter/Interpreter.h>
+
+#include <foder/FileLoader.h>
+#include <crew/PConfig.h>
+
+#include <fstream>
+#include <iostream>
+#include <vector>
+#include <string>
+#include <stdexcept>
+
+namespace
+{
+
+void write_file(const std::string &filename, const char *data, size_t data_size)
+{
+ std::ofstream fs(filename, std::ofstream::binary);
+ if (fs.fail())
+ throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
+ if (fs.write(data, data_size).fail())
+ {
+ throw std::runtime_error("Failed to write data to file \"" + filename + "\".\n");
+ }
+}
+
+std::unique_ptr<luci::Module> import_circle(const std::string &filename)
+{
+ std::ifstream fs(filename, std::ifstream::binary);
+ if (fs.fail())
+ {
+ throw std::runtime_error("Cannot open model file \"" + filename + "\".\n");
+ }
+ std::vector<char> model_data((std::istreambuf_iterator<char>(fs)),
+ std::istreambuf_iterator<char>());
+
+ return luci::Importer().importModule(circle::GetModel(model_data.data()));
+}
+
+void save_shape(const std::string &shape_filename, const luci::CircleOutput *output_node)
+{
+ if (output_node->rank() == 0)
+ {
+ write_file(shape_filename, "1", 1);
+ }
+ else
+ {
+ auto shape_str = std::to_string(output_node->dim(0).value());
+ for (uint32_t j = 1; j < output_node->rank(); j++)
+ {
+ shape_str += ",";
+ shape_str += std::to_string(output_node->dim(j).value());
+ }
+ write_file(shape_filename, shape_str.c_str(), shape_str.size());
+ }
+}
+
+template <typename NodeT> size_t tensor_size(const NodeT *node)
+{
+ uint32_t tsize = loco::size(node->dtype());
+ for (uint32_t i = 0; i < node->rank(); ++i)
+ {
+ assert(node->dim(i).known());
+ tsize *= node->dim(i).value();
+ }
+ return tsize;
+}
+
+} // namespace
+
+namespace prunner
+{
+
+bool PModelsRunner::load_config(const std::string &filename)
+{
+ if (!crew::read_ini(filename, _pconfig))
+ {
+ std::cerr << "ERROR: Invalid config ini file: '" << filename << "'" << std::endl;
+ return false;
+ }
+
+ for (auto &part : _pconfig.parts)
+ {
+ _models_to_run.push_back(part.model_file);
+ }
+ return true;
+}
+
+void PModelsRunner::load_inputs(const std::string &input_prefix, int32_t num_inputs)
+{
+ LOGGER(l);
+
+ auto its = _pconfig.source.inputs.begin();
+ for (int32_t i = 0; i < num_inputs; ++i, ++its)
+ {
+ std::string filename = input_prefix + std::to_string(i);
+
+ INFO(l) << "Load input data: " << filename << std::endl;
+ foder::FileLoader file_loader{filename};
+
+ std::string input_name = *its;
+ _data_stage[input_name] = file_loader.load();
+
+ INFO(l) << "Input: [" << input_name << "], size " << _data_stage[input_name].size()
+ << std::endl;
+ }
+}
+
+/**
+ * @brief return true if all inputs of the model is ready in _data_storage
+ */
+bool PModelsRunner::is_input_ready(const RunModel &model)
+{
+ for (auto &part : _pconfig.parts)
+ {
+ if (part.model_file != model)
+ continue;
+
+ for (auto &input : part.inputs)
+ {
+ auto it = _data_stage.find(input);
+ if (it == _data_stage.end())
+ return false;
+ }
+ }
+ return true;
+}
+
+bool PModelsRunner::run(void)
+{
+ LOGGER(l);
+
+ // for each partitioned model, if the inputs of the model are ready, run the model
+ do
+ {
+ bool found_model = false;
+
+ for (auto it = _models_to_run.begin(); it != _models_to_run.end(); ++it)
+ {
+ auto model_fname = *it;
+
+ INFO(l) << "Check model input ready: " << model_fname << std::endl;
+ if (is_input_ready(model_fname))
+ {
+ found_model = true;
+
+ INFO(l) << "Run model: " << model_fname << std::endl;
+ auto module = import_circle(model_fname);
+
+ luci_interpreter::Interpreter interpreter(module.get());
+
+ // Set input
+ // TODO support multiple subgraphs
+ assert(module->size() == 1);
+ const auto input_nodes = loco::input_nodes(module->graph());
+ int32_t num_inputs = static_cast<int32_t>(input_nodes.size());
+ for (int32_t i = 0; i < num_inputs; i++)
+ {
+ const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[i]);
+
+ auto input_name = input_node->name();
+ assert(_data_stage.find(input_name) != _data_stage.end());
+
+ auto input_data = _data_stage[input_name];
+
+ interpreter.writeInputTensor(input_node, input_data.data(), input_data.size());
+ }
+
+ // Run interpreter
+ interpreter.interpret();
+ INFO(l) << "Run model: " << model_fname << " done" << std::endl;
+
+ // Get output.
+ const auto output_nodes = loco::output_nodes(module->graph());
+ for (uint32_t i = 0; i < module->graph()->outputs()->size(); i++)
+ {
+ const auto *output_node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]);
+ auto output_name = output_node->name();
+
+ Buffer output_data(tensor_size(output_node));
+
+ interpreter.readOutputTensor(output_node, output_data.data(), output_data.size());
+
+ // There should not exist same output names
+ // TODO check with multiple virtual outputs
+ assert(_data_stage.find(output_name) == _data_stage.end());
+ _data_stage[output_name] = output_data;
+ }
+
+ // We've ran this model, remove from the model list
+ _models_to_run.erase(it);
+ break;
+ }
+ }
+
+ if (not found_model)
+ {
+ std::cerr << "ERROR: model partition or configuration has problems" << std::endl;
+ return false;
+ }
+ } while (not _models_to_run.empty());
+
+ return true;
+}
+
+void PModelsRunner::save_outputs(const std::string &output_file)
+{
+ // load source model as we need to get both shape and node name
+ // TODO check for unknown shape
+ auto source_fname = _pconfig.source.model_file;
+
+ auto module = import_circle(source_fname);
+
+ const auto output_nodes = loco::output_nodes(module->graph());
+ for (uint32_t i = 0; i < module->graph()->outputs()->size(); i++)
+ {
+ const auto *output_node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]);
+
+ auto output_name = output_node->name();
+ assert(_data_stage.find(output_name) != _data_stage.end());
+
+ auto tensor_data = _data_stage[output_name];
+ auto output_filename = output_file + std::to_string(i);
+
+ write_file(output_filename, tensor_data.data(), tensor_data.size());
+ save_shape(output_filename + ".shape", output_node);
+ }
+}
+
+} // namespace prunner
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_PRUNNER_PMODELS_RUNNER_H__
+#define __CIRCLE_PRUNNER_PMODELS_RUNNER_H__
+
+#include <crew/PConfig.h>
+
+#include <map>
+#include <string>
+#include <vector>
+
+namespace prunner
+{
+
+using Buffer = std::vector<char>;
+
+using Buffers = std::map<std::string, Buffer>;
+
+using RunModel = std::string;
+
+using RunModels = std::vector<RunModel>;
+
+/**
+ * @brief PModelsRunner runs partitioned models from input data file and stores
+ * output data to a file
+ */
+class PModelsRunner
+{
+public:
+ PModelsRunner() = default;
+
+public:
+ bool load_config(const std::string &filename);
+ void load_inputs(const std::string &input_prefix, int32_t num_inputs);
+ bool run(void);
+ void save_outputs(const std::string &output_file);
+
+private:
+ bool is_input_ready(const RunModel &model);
+
+private:
+ crew::PConfig _pconfig;
+ RunModels _models_to_run;
+ Buffers _data_stage;
+};
+
+} // namespace prunner
+
+#endif // __CIRCLE_PRUNNER_PMODELS_RUNNER_H__
--- /dev/null
+#
+# this project validates partitioned models produced by circle-partitioner
+# with circle-part-driver and two scripts; part_eval_all.sh and part_eval_one.py
+#
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
+
+unset(RECIPE_LIST)
+unset(PARTITION_LIST)
+unset(TEST_DEPS)
+
+macro(add RECIPE_NAME PARTITION_NAME)
+ list(APPEND RECIPE_LIST ${RECIPE_NAME})
+ list(APPEND PARTITION_LIST ${PARTITION_NAME})
+endmacro(add)
+
+# Read "test.lst"
+include("test.lst")
+
+list(LENGTH RECIPE_LIST RECIPE_LENGTH)
+math(EXPR RECIPE_LENGTH_M1 "${RECIPE_LENGTH} - 1")
+
+foreach(IDX RANGE ${RECIPE_LENGTH_M1})
+ list(GET RECIPE_LIST ${IDX} RECIPE_NAME)
+ list(GET PARTITION_LIST ${IDX} PARTITION_NAME)
+
+ # NOTE about the name:
+ # Use '.recipe' name for source tflite and circle files
+ # Use '.part' name for actual test folder and test files
+
+ # Output to a folder
+ set(PARTITIONER_OUTPUT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${PARTITION_NAME}")
+
+ add_custom_command(OUTPUT ${PARTITIONER_OUTPUT_PATH}
+ COMMAND ${CMAKE_COMMAND} -E make_directory "${PARTITIONER_OUTPUT_PATH}"
+ COMMENT "Make directory ${PARTITIONER_OUTPUT_PATH}"
+ )
+
+ # Copy tflite
+ set(TFLITE_SRC_PATH "${ARTIFACTS_BIN_PATH}/${RECIPE_NAME}.tflite")
+ set(TFLITE_DST_PATH "${PARTITIONER_OUTPUT_PATH}/${PARTITION_NAME}.tflite")
+
+ add_custom_command(OUTPUT ${TFLITE_DST_PATH}
+ COMMAND ${CMAKE_COMMAND} -E copy "${TFLITE_SRC_PATH}" "${TFLITE_DST_PATH}"
+ DEPENDS ${TFLITE_SRC_PATH}
+ COMMENT "Copy ${RECIPE_NAME}.tflite"
+ )
+ list(APPEND TEST_DEPS ${TFLITE_DST_PATH})
+
+ # Copy circle
+ set(CIRCLE_SRC_PATH "${ARTIFACTS_BIN_PATH}/${RECIPE_NAME}.circle")
+ set(CIRCLE_DST_PATH "${PARTITIONER_OUTPUT_PATH}/${PARTITION_NAME}.circle")
+
+ add_custom_command(OUTPUT ${CIRCLE_DST_PATH}
+ COMMAND ${CMAKE_COMMAND} -E copy "${CIRCLE_SRC_PATH}" "${CIRCLE_DST_PATH}"
+ DEPENDS ${CIRCLE_SRC_PATH}
+ COMMENT "Copy ${RECIPE_NAME}.circle"
+ )
+ list(APPEND TEST_DEPS ${CIRCLE_DST_PATH})
+
+ # Copy .part
+ set(PART_FILE "${PARTITION_NAME}.part")
+ set(PART_SRC_PATH "${CMAKE_CURRENT_SOURCE_DIR}/parts/${PART_FILE}")
+ set(PART_DST_PATH "${PARTITIONER_OUTPUT_PATH}/${PART_FILE}")
+
+ add_custom_command(OUTPUT ${PART_DST_PATH}
+ COMMAND ${CMAKE_COMMAND} -E copy "${PART_SRC_PATH}" "${PART_DST_PATH}"
+ DEPENDS ${PART_SRC_PATH}
+ COMMENT "Copy ${PART_FILE}"
+ )
+ list(APPEND TEST_DEPS ${PART_DST_PATH})
+
+ # Partition connection file to generate
+ set(PARTITIONER_CONN_JSON "${PARTITIONER_OUTPUT_PATH}/${PARTITION_NAME}.conn.json")
+
+ # Run partitioner
+ add_custom_command(OUTPUT ${PARTITIONER_CONN_JSON}
+ COMMAND circle_partitioner "${PART_FILE}" "${PARTITION_NAME}.circle" "${PARTITIONER_OUTPUT_PATH}"
+ DEPENDS circle_partitioner ${PART_DST_PATH} ${CIRCLE_DST_PATH}
+ COMMENT "Parition ${RECIPE_NAME}.circle with ${PART_FILE}"
+ )
+ list(APPEND TEST_DEPS ${PARTITIONER_CONN_JSON})
+endforeach(IDX)
+
+add_custom_target(circle_part_value_test_prepare ALL DEPENDS ${TEST_DEPS})
+add_dependencies(circle_part_value_test_prepare common_artifacts_deps)
+
+# run evaluation
+add_test(NAME circle_part_value_test
+ COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/part_eval_all.sh"
+ "${CMAKE_CURRENT_BINARY_DIR}"
+ "${NNCC_OVERLAY_DIR}/venv_2_3_0"
+ "$<TARGET_FILE:circle_part_driver>"
+ ${PARTITION_LIST}
+)
--- /dev/null
+# circle-part-value-test
+
+_circle-part-value-test_ evaluates partitioned models produced by circle-partitioner.
+
+### Process of evaluation
+
+Evaluation process is like how _luci-value-test_ does.
+
+1) generates random input and stores to reference input file(s)
+2) executes tflite file from common-artifacts for reference output
+3) partitions circle file with .part file and produces into output folder
+4) executes produced partitioned circle models with reference input file(s)
+5) saves output(s) of circle models to file(s)
+6) compares reference output with saved output file(s)
+7) fail test if values differ
--- /dev/null
+#!/bin/bash
+
+# This script verifies the basic behavior of circle-partitioner
+#
+# HOW TO USE
+#
+# ./part_eval_all.sh <path/to/work_dir> <path/to/venv_dir> <path/to/driver> <TEST 1> <TEST 2> ...
+#
+# bin_dir : build directory of circle-part-value-test (ex: build/compiler/circle-part-value-test)
+# work_dir : artifacts directoy where test materials exist
+# venv_dir : python virtual environment home directory
+
+VERIFY_SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+VERIFY_SCRIPT_PATH="${VERIFY_SOURCE_PATH}/part_eval_one.py"
+WORKDIR="$1"; shift
+VIRTUALENV="$1"; shift
+CIRCLE_PART_DRIVER_PATH="$1"; shift
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+for TESTCASE in "$@"; do
+ TESTED+=("${TESTCASE}")
+
+ # for simplicity, folder uses same ${TESTCASE}
+ TESTCASE_FOLDER="${WORKDIR}/${TESTCASE}"
+
+ PASSED_TAG="${TESTCASE_FOLDER}.passed"
+ rm -f "${PASSED_TAG}"
+
+ cat > "${TESTCASE_FOLDER}.log" <(
+ exec 2>&1
+ set -ex
+
+ # chdir into the folder as ini has relative filename of the model
+ pushd ${TESTCASE_FOLDER}
+
+ source "${VIRTUALENV}/bin/activate"
+ "${VIRTUALENV}/bin/python" "${VERIFY_SCRIPT_PATH}" \
+ --driver "${CIRCLE_PART_DRIVER_PATH}" \
+ --name "${TESTCASE}"
+
+ if [[ $? -eq 0 ]]; then
+ touch "${PASSED_TAG}"
+ fi
+
+ popd
+ )
+
+ if [[ -f "${PASSED_TAG}" ]]; then
+ PASSED+=("${TESTCASE}")
+ else
+ FAILED+=("${TESTCASE}")
+ fi
+done
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 255
+fi
+
+echo "PASSED"
+exit 0
--- /dev/null
+#!/usr/bin/env python3
+import numpy as np
+import tensorflow as tf
+import subprocess
+import argparse
+import traceback
+
+#
+# This script compares the execution result of TFLite interpreter and
+# partitioned model(s) from a circle model
+#
+# Basic usage for example:
+# part_eval_one.py \
+# --driver build/compiler/circle-part-driver/circle-part-driver \
+# --name test_file
+#
+parser = argparse.ArgumentParser()
+parser.add_argument('--driver', type=str, required=True)
+parser.add_argument('--name', type=str, required=True)
+args = parser.parse_args()
+
+driver = args.driver
+tflite_model = args.name + ".tflite"
+circle_model = args.name + ".circle"
+partition_conn_ini = args.name + ".conn.ini"
+
+# Build TFLite interpreter.
+interpreter = tf.lite.Interpreter(tflite_model)
+interpreter.allocate_tensors()
+
+# Generate random input data.
+num_inputs = len(interpreter.get_input_details())
+for i in range(num_inputs):
+ input_details = interpreter.get_input_details()[i]
+ if input_details["dtype"] == np.float32:
+ input_data = np.array(
+ np.random.random_sample(input_details["shape"]), input_details["dtype"])
+ elif input_details["dtype"] == np.uint8:
+ input_data = np.array(
+ np.random.randint(0, 256, size=input_details["shape"]),
+ input_details["dtype"])
+ elif input_details["dtype"] == np.bool_:
+ input_data = np.array(
+ np.random.choice(a=[True, False], size=input_details["shape"]),
+ input_details["dtype"])
+ else:
+ raise SystemExit("Unsupported input dtype")
+
+ interpreter.set_tensor(input_details["index"], input_data)
+ input_data.tofile(circle_model + ".input" + str(i))
+
+# Do inference
+interpreter.invoke()
+
+# Execute circle-part-driver.
+partition_command = [
+ driver, partition_conn_ini,
+ str(num_inputs), circle_model + ".input", circle_model + ".output"
+]
+print("Run: ")
+for arg in partition_command:
+ print(" ", arg, "\\")
+print("", flush=True)
+
+subprocess.run(partition_command, check=True)
+
+# Compare the results.
+for idx in range(len(interpreter.get_output_details())):
+ output_details = interpreter.get_output_details()[idx]
+ output_data = np.fromfile(circle_model + ".output" + str(idx),
+ output_details["dtype"])
+ shape_file = open(circle_model + ".output" + str(idx) + ".shape", 'r')
+ output_shape = [int(i) for i in shape_file.read().split(',')]
+ luci_output_data = np.reshape(output_data, output_shape)
+ try:
+ if output_details["dtype"] == np.uint8:
+ if np.allclose(
+ luci_output_data,
+ interpreter.get_tensor(
+ interpreter.get_output_details()[idx]["index"]),
+ rtol=0,
+ atol=0) == False:
+ raise SystemExit("Execution result of " + tflite_model +
+ " does not match with " + circle_model)
+ elif output_details["dtype"] == np.float32:
+ if np.allclose(
+ luci_output_data,
+ interpreter.get_tensor(
+ interpreter.get_output_details()[idx]["index"]),
+ rtol=1.e-5,
+ atol=1.e-5) == False:
+ raise SystemExit("Execution result of " + tflite_model +
+ " does not match with " + circle_model)
+ elif output_details["dtype"] == np.int64:
+ if np.allclose(
+ luci_output_data,
+ interpreter.get_tensor(
+ interpreter.get_output_details()[idx]["index"]),
+ rtol=0,
+ atol=0) == False:
+ raise SystemExit("Execution result of " + tflite_model +
+ " does not match with " + circle_model)
+ elif output_details["dtype"] == np.int32:
+ if np.allclose(
+ luci_output_data,
+ interpreter.get_tensor(
+ interpreter.get_output_details()[idx]["index"]),
+ rtol=0,
+ atol=0) == False:
+ raise SystemExit("Execution result of " + tflite_model +
+ " does not match with " + circle_model)
+ else:
+ raise SystemExit("Unsupported data type: ", output_details["dtype"])
+ except:
+ print(traceback.format_exc())
+ quit(255)
+
+quit(0)
--- /dev/null
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+ADD=acl_cl
--- /dev/null
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+SUB=acl_cl
+DIV=acl_cl
--- /dev/null
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+DIV=acl_cl
--- /dev/null
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+SQRT=acl_cl
--- /dev/null
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+RSQRT=acl_cl
--- /dev/null
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+SUB=acl_cl
--- /dev/null
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+SQRT=acl_cl
--- /dev/null
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+SQRT=acl_cl
--- /dev/null
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+SQRT=acl_cl
--- /dev/null
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+SQRT=acl_cl
--- /dev/null
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+SQRT=acl_cl
--- /dev/null
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+SQRT=acl_cl
--- /dev/null
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+SQRT=acl_cl
--- /dev/null
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+WWW=acl_cl
--- /dev/null
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
--- /dev/null
+require("common-artifacts")
+require("circle-partitioner")
+require("circle-part-driver")
--- /dev/null
+# Add recipe names from /res/TensorFlowLiteRecipes to test.
+# Only add items exist in common-artifacts test: tflite/circle files are copied as source.
+#
+# add(RECIPE_NAME PARTITION_NAME)
+
+add(Part_Add_Sub_000 Part_Add_Sub_000)
+add(Part_Sqrt_Rsqrt_000 Part_Sqrt_Rsqrt_000)
+add(Part_Sqrt_Rsqrt_001 Part_Sqrt_Rsqrt_001)
+add(Part_Sqrt_Rsqrt_002 Part_Sqrt_Rsqrt_002)
+add(Part_Sqrt_Rsqrt_003 Part_Sqrt_Rsqrt_003)
+add(Part_Sqrt_Rsqrt_Add_000 Part_Sqrt_Rsqrt_Add_000)
+add(Part_Sqrt_Rsqrt_Add_001 Part_Sqrt_Rsqrt_Add_001)
+add(Part_Sqrt_Rsqrt_Add_002 Part_Sqrt_Rsqrt_Add_002)
+add(Part_Sqrt_Rsqrt_Add_003 Part_Sqrt_Rsqrt_Add_003)
+add(Part_Sqrt_Rsqrt_Add_004 Part_Sqrt_Rsqrt_Add_004)
+add(Part_Add_Sqrt_000 Part_Add_Sqrt_000)
+add(Part_Add_Sqrt_Rsqrt_000 Part_Add_Sqrt_Rsqrt_000)
+add(Net_InstanceNorm_003 Net_InstanceNorm_003)
+add(Net_InstanceNorm_003 Net_InstanceNorm_003.001)
+add(Net_InstanceNorm_003 Net_InstanceNorm_003.002)
--- /dev/null
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_executable(circle_partitioner "${SOURCES}")
+target_link_libraries(circle_partitioner foder)
+target_link_libraries(circle_partitioner crew)
+target_link_libraries(circle_partitioner safemain)
+target_link_libraries(circle_partitioner luci_lang)
+target_link_libraries(circle_partitioner luci_log)
+target_link_libraries(circle_partitioner luci_import)
+target_link_libraries(circle_partitioner luci_service)
+target_link_libraries(circle_partitioner luci_export)
+target_link_libraries(circle_partitioner luci_partition)
+target_link_libraries(circle_partitioner arser)
+target_link_libraries(circle_partitioner vconone)
+target_link_libraries(circle_partitioner nncc_common)
+
+install(TARGETS circle_partitioner DESTINATION bin)
--- /dev/null
+# circle-partitioner
+
+_circle-partitioner_ provides model partitioning of circle model to two or more circle models.
--- /dev/null
+require("foder")
+require("crew")
+require("safemain")
+require("luci")
+require("arser")
+require("vconone")
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionRead.h"
+#include "PartitionExport.h"
+#include "HelperPath.h"
+#include "HelperStrings.h"
+
+#include <foder/FileLoader.h>
+
+#include <luci/Importer.h>
+#include <luci/Service/Validate.h>
+#include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
+#include <luci/Log.h>
+
+#include <arser/arser.h>
+#include <vconone/vconone.h>
+
+#include <iostream>
+#include <string>
+
+namespace
+{
+
+const char *opt_bks = "--backends";
+const char *opt_def = "--default";
+const char *opt_part = "partition";
+const char *opt_input = "input";
+const char *opt_work = "work";
+
+void print_version(void)
+{
+ std::cout << "circle-partitioner version " << vconone::get_string() << std::endl;
+ std::cout << vconone::get_copyright() << std::endl;
+}
+
+void build_arser(arser::Arser &arser)
+{
+ arser.add_argument("--version")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("Show version information and exit")
+ .exit_with(print_version);
+
+ arser.add_argument(opt_bks)
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .required(false)
+ .help("Backends in CSV to use for partitioning");
+
+ arser.add_argument(opt_def)
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .required(false)
+ .help("Default backend to assign");
+
+ arser.add_argument(opt_part)
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .help("Partition file which provides backend to assign");
+ arser.add_argument(opt_input)
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .help("Input circle model filename");
+ arser.add_argument(opt_work)
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .help("Work folder of partition, input files exist and output files are produced");
+}
+
+std::unique_ptr<luci::Module> load_model(const std::string &input_path)
+{
+ // Load model from the file
+ foder::FileLoader file_loader{input_path};
+ std::vector<char> model_data = file_loader.load();
+
+ // Verify flatbuffers
+ flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()};
+ if (!circle::VerifyModelBuffer(verifier))
+ {
+ std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl;
+ return nullptr;
+ }
+
+ const circle::Model *circle_model = circle::GetModel(model_data.data());
+ if (circle_model == nullptr)
+ {
+ std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl;
+ return nullptr;
+ }
+
+ // Import from input Circle file
+ luci::Importer importer;
+ return importer.importModule(circle_model);
+}
+
+bool validate_module(luci::Module *module)
+{
+ for (size_t g = 0; g < module->size(); ++g)
+ {
+ auto graph = module->graph(g);
+ if (!luci::validate(graph))
+ {
+ std::cerr << "ERROR: Invalid circle model" << std::endl;
+ return false;
+ }
+ if (!luci::validate_name(graph))
+ {
+ std::cerr << "ERROR: circle model has empty name" << std::endl;
+ return false;
+ }
+ }
+
+ if (!luci::validate_unique_name(module))
+ {
+ std::cerr << "ERROR: circle model has duplicate names" << std::endl;
+ return false;
+ }
+
+ return true;
+}
+
+bool validate_partition(luci::PartitionTable &partition)
+{
+ if (partition.groups.size() == 0)
+ {
+ std::cerr << "There is no 'backends' information";
+ return false;
+ }
+ if (partition.default_group.empty())
+ {
+ std::cerr << "There is no 'default' backend information";
+ return false;
+ }
+ if (!partee::is_one_of(partition.default_group, partition.groups))
+ {
+ std::cerr << "'default' backend is not one of 'backends' item";
+ return false;
+ }
+ for (auto &byopcode : partition.byopcodes)
+ {
+ if (!partee::is_one_of(byopcode.second, partition.groups))
+ {
+ std::cerr << "OPCODE " << byopcode.first << " is not assigned to one of 'backends' items";
+ return false;
+ }
+ }
+ return true;
+}
+
+void dump(std::ostream &os, const luci::PartitionTable &table)
+{
+ os << "Backends:";
+ for (auto &group : table.groups)
+ {
+ os << " " << group;
+ if (table.default_group == group)
+ os << "(default)";
+ }
+ os << std::endl;
+
+ os << "Assign by OPCODE: " << std::endl;
+ for (auto &item : table.byopcodes)
+ os << " " << item.first << "=" << item.second << std::endl;
+}
+
+std::ostream &operator<<(std::ostream &os, const luci::PartitionTable &table)
+{
+ dump(os, table);
+ return os;
+}
+
+} // namespace
+
+int entry(int argc, char **argv)
+{
+ LOGGER(l);
+
+ arser::Arser arser("circle-partitioner provides circle model partitioning");
+
+ build_arser(arser);
+
+ try
+ {
+ arser.parse(argc, argv);
+ }
+ catch (const std::runtime_error &err)
+ {
+ std::cerr << err.what() << std::endl;
+ std::cerr << arser;
+ return EXIT_FAILURE;
+ }
+
+ std::string partition_file = arser.get<std::string>(opt_part);
+ std::string input_file = arser.get<std::string>(opt_input);
+ std::string work_folder = arser.get<std::string>(opt_work);
+
+ std::string partition_path = work_folder + "/" + partition_file;
+ std::string input_path = work_folder + "/" + input_file;
+
+ auto module = load_model(input_path);
+ if (module.get() == nullptr)
+ {
+ return EXIT_FAILURE;
+ }
+ if (!validate_module(module.get()))
+ {
+ return EXIT_FAILURE;
+ }
+
+ // Read partition information
+ INFO(l) << "--- Read PartitionConfig-----------------------" << std::endl;
+ auto partition = partee::read(partition_path);
+ INFO(l) << partition << std::endl;
+
+ // override with command line arguments
+ {
+ if (arser[opt_bks])
+ {
+ auto backend_backends = arser.get<std::string>(opt_bks);
+ partition.groups = partee::csv_to_vector<std::string>(backend_backends);
+ }
+ if (arser[opt_def])
+ {
+ partition.default_group = arser.get<std::string>(opt_def);
+ }
+ }
+ if (!validate_partition(partition))
+ {
+ return EXIT_FAILURE;
+ }
+
+ INFO(l) << "--- PartitionConfig final----------------------" << std::endl;
+ INFO(l) << partition << std::endl;
+
+ // apply partition to module
+ auto pms = luci::apply(module.get(), partition);
+
+ // validate partitioned modules
+ for (auto &pmodule : pms.pmodules)
+ {
+ for (size_t g = 0; g < pmodule.module->size(); ++g)
+ {
+ auto graph = pmodule.module->graph(g);
+ if (graph == nullptr)
+ {
+ std::cerr << "ERROR: Failed to create partition model" << std::endl;
+ return EXIT_FAILURE;
+ }
+ if (!luci::validate(graph))
+ {
+ std::cerr << "ERROR: Failed to create partition model" << std::endl;
+ return EXIT_FAILURE;
+ }
+ }
+ }
+
+ INFO(l) << "--- Partition Export---------------------------" << std::endl;
+ uint32_t idx = 1;
+ for (auto &pmodule : pms.pmodules)
+ {
+ // Export to output circle file
+ luci::CircleExporter exporter;
+
+ auto output_path = partee::make_path(work_folder, input_path, idx, pmodule.group);
+ pmodule.name = partee::get_filename_ext(output_path);
+ INFO(l) << "--- " << output_path << ": " << pmodule.name << std::endl;
+
+ luci::CircleFileExpContract contract(pmodule.module.get(), output_path);
+ if (!exporter.invoke(&contract))
+ {
+ std::cerr << "ERROR: Failed to export '" << output_path << "'" << std::endl;
+ return EXIT_FAILURE;
+ }
+ idx++;
+ }
+
+ INFO(l) << "--- Partition connection information-----------" << std::endl;
+ if (!partee::export_part_conn_json(work_folder, input_file, module.get(), pms))
+ {
+ return EXIT_FAILURE;
+ }
+ if (!partee::export_part_conn_ini(work_folder, input_file, module.get(), pms))
+ {
+ return EXIT_FAILURE;
+ }
+
+ INFO(l) << "--- Partition done-----------------------------" << std::endl << std::endl;
+
+ return EXIT_SUCCESS;
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "HelperPath.h"
+
+#include <cassert>
+#include <sstream>
+#include <stdlib.h>
+
+namespace partee
+{
+
+bool make_dir(const std::string &path)
+{
+ std::string command("mkdir -p ");
+ command += path;
+ int ret = ::system(command.c_str());
+ return ret == 0;
+}
+
+std::string get_filename_ext(const std::string &base)
+{
+ // find last '/' to get filename.ext
+ auto pos = base.find_last_of("/");
+ if (pos == std::string::npos)
+ return base;
+
+ return base.substr(pos + 1);
+}
+
+std::string make_path(const std::string &base, const std::string &input, uint32_t idx,
+ const std::string &backend)
+{
+ auto filename_ext = get_filename_ext(input);
+
+ // We will assume file type .circle if not given
+ // TODO maybe throw if there is no extension?
+ std::string filename = filename_ext;
+ std::string ext = "circle";
+
+ auto pos = filename_ext.find_last_of(".");
+ if (pos != std::string::npos)
+ {
+ filename = filename_ext.substr(0, pos);
+ ext = filename_ext.substr(pos + 1);
+ }
+
+ // format idx with 5 '0' paddings like '00123'
+ uint32_t length = 5;
+ auto seq = std::string(length, '0').append(std::to_string(idx));
+ auto seq_fmt = seq.substr(seq.size() - length);
+
+ return base + "/" + filename + "." + seq_fmt + "_" + backend + "." + ext;
+}
+
+} // namespace partee
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_HELPER_PATH_H__
+#define __CIRCLE_HELPER_PATH_H__
+
+#include <string>
+
+namespace partee
+{
+
+/**
+ * @brief create folder
+ */
+bool make_dir(const std::string &path);
+
+/**
+ * @brief get filename part of base
+ */
+std::string get_filename_ext(const std::string &base);
+
+/**
+ * @brief Make file path from base and backend
+ */
+std::string make_path(const std::string &base, const std::string &input, uint32_t idx,
+ const std::string &backend);
+
+} // namespace partee
+
+#endif // __CIRCLE_HELPER_PATH_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "HelperStrings.h"
+
+#include <algorithm>
+#include <sstream>
+
+namespace partee
+{
+
+template <> std::vector<std::string> csv_to_vector(const std::string &str)
+{
+ std::vector<std::string> ret;
+ std::istringstream is(str);
+ for (std::string item; std::getline(is, item, ',');)
+ {
+ ret.push_back(item);
+ }
+ return ret;
+}
+
+bool is_one_of(const std::string &item, const std::vector<std::string> &items)
+{
+ return std::find(items.begin(), items.end(), item) != items.end();
+}
+
+} // namespace partee
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_HELPER_STRINGS_H__
+#define __CIRCLE_HELPER_STRINGS_H__
+
+#include <string>
+#include <vector>
+
+namespace partee
+{
+
+template <typename T> std::vector<T> csv_to_vector(const std::string &str);
+
+bool is_one_of(const std::string &item, const std::vector<std::string> &items);
+
+} // namespace partee
+
+#endif // __CIRCLE_HELPER_STRINGS_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionExport.h"
+#include "HelperPath.h"
+
+#include <crew/PConfig.h>
+
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <vector>
+
+namespace
+{
+
+std::string export_file_path(const std::string &output_base, const std::string &input,
+ const std::string &ext)
+{
+ auto filename_ext = partee::get_filename_ext(input);
+ auto pos = filename_ext.find_last_of(".");
+ assert(pos > 0);
+ auto filename = filename_ext.substr(0, pos);
+ auto filepath = output_base + "/" + filename + ".conn" + ext;
+ return filepath;
+}
+
+} // namespace
+
+namespace
+{
+
+void graph_io_to_config_part(loco::Graph *graph, crew::Part &part)
+{
+ assert(graph != nullptr);
+
+ auto *gis = graph->inputs();
+ auto *gos = graph->outputs();
+ for (uint32_t i = 0; i < gis->size(); ++i)
+ {
+ auto *gi = gis->at(i);
+ assert(gi != nullptr);
+ part.inputs.push_back(gi->name());
+ }
+ for (uint32_t i = 0; i < gos->size(); ++i)
+ {
+ auto *go = gos->at(i);
+ assert(go != nullptr);
+ part.outputs.push_back(go->name());
+ }
+}
+
+void pms2config(const luci::PartedModules &pms, crew::PConfig &pconfig)
+{
+ for (auto &pmodule : pms.pmodules)
+ {
+ auto *graph = pmodule.module->graph();
+
+ crew::Part part;
+ part.model_file = pmodule.name;
+ graph_io_to_config_part(graph, part);
+
+ pconfig.parts.push_back(part);
+ }
+}
+
+} // namespace
+
+namespace partee
+{
+
+bool export_part_conn_json(const std::string &output_base, const std::string &input,
+ const luci::Module *source, luci::PartedModules &pms)
+{
+ crew::PConfig pconfig;
+
+ // TODO is graph I/O using main graph is enough?
+ auto *graph = source->graph();
+
+ pconfig.source.model_file = input;
+ graph_io_to_config_part(graph, pconfig.source);
+
+ pms2config(pms, pconfig);
+
+ auto filepath_json = export_file_path(output_base, input, ".json");
+ std::ofstream fs(filepath_json.c_str(), std::ofstream::binary | std::ofstream::trunc);
+ if (not fs.good())
+ {
+ std::cerr << "ERROR: Failed to create file: " << filepath_json;
+ return false;
+ }
+ if (not write_json(fs, pconfig))
+ {
+ std::cerr << "ERROR: Failed to write json file: " << filepath_json;
+ return false;
+ }
+ fs.close();
+
+ return true;
+}
+
+bool export_part_conn_ini(const std::string &output_base, const std::string &input,
+ const luci::Module *source, luci::PartedModules &pms)
+{
+ crew::PConfig pconfig;
+
+ // TODO is graph I/O using main graph is enough?
+ auto *graph = source->graph();
+
+ pconfig.source.model_file = input;
+ graph_io_to_config_part(graph, pconfig.source);
+
+ pms2config(pms, pconfig);
+
+ auto filepath_ini = export_file_path(output_base, input, ".ini");
+ std::ofstream fs(filepath_ini.c_str(), std::ofstream::binary | std::ofstream::trunc);
+ if (not fs.good())
+ {
+ std::cerr << "ERROR: Failed to create file: " << filepath_ini;
+ return false;
+ }
+ if (not write_ini(fs, pconfig))
+ {
+ std::cerr << "ERROR: Failed to write ini file: " << filepath_ini;
+ return false;
+ }
+ fs.close();
+
+ return true;
+}
+
+} // namespace partee
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_PARTITION_EXPORT_H__
+#define __CIRCLE_PARTITION_EXPORT_H__
+
+#include <luci/Partition.h>
+
+#include <string>
+
+namespace partee
+{
+
+/**
+ * @brief This will save partition connection to json format file
+ */
+bool export_part_conn_json(const std::string &output_base, const std::string &input,
+ const luci::Module *source, luci::PartedModules &pms);
+
+/**
+ * @brief This will save partition connection to ini format file
+ */
+bool export_part_conn_ini(const std::string &output_base, const std::string &input,
+ const luci::Module *source, luci::PartedModules &pms);
+
+} // namespace partee
+
+#endif // __CIRCLE_PARTITION_EXPORT_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionRead.h"
+#include "HelperStrings.h"
+
+#include <crew/PConfigIni.h>
+#include <crew/PConfigIniDump.h>
+#include <luci/Log.h>
+
+#include <stdexcept>
+
+namespace
+{
+
+using namespace partee;
+
+const char *_section_partition = "partition";
+const char *_section_OPCODE = "OPCODE";
+
+const char *_key_backends = "backends";
+const char *_key_default = "default";
+const char *_key_underscore = "_";
+
+luci::PartitionTable parse_table(const crew::Sections §ions)
+{
+ luci::PartitionTable table;
+
+ for (auto §ion : sections)
+ {
+ if (section.name == _section_partition)
+ {
+ auto &items = section.items;
+ if (items.find(_key_backends) == items.end())
+ {
+ throw std::invalid_argument("'backends' is required");
+ }
+ if (items.find(_key_default) == items.end())
+ {
+ throw std::invalid_argument("'default' is required");
+ }
+
+ table.groups = csv_to_vector<std::string>(items.at(_key_backends));
+ table.default_group = items.at(_key_default);
+ }
+ else if (section.name == _section_OPCODE)
+ {
+ auto &items = section.items;
+
+ for (auto &item : items)
+ {
+ if (item.first == _key_underscore)
+ table.default_group = item.second;
+ else
+ {
+ table.byopcodes.emplace(item.first, item.second);
+ }
+ }
+ }
+ }
+
+ return table;
+}
+
+} // namespace
+
+namespace partee
+{
+
+luci::PartitionTable read(const std::string &path)
+{
+ LOGGER(l);
+
+ INFO(l) << "PartitionConfig: " << path << std::endl;
+
+ auto partition_config = crew::read_ini(path);
+
+ INFO(l) << partition_config << std::endl;
+
+ auto partition_table = parse_table(partition_config);
+
+ return partition_table;
+}
+
+} // namespace partee
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_PARTITION_READ_H__
+#define __CIRCLE_PARTITION_READ_H__
+
+#include <luci/IR/Module.h>
+#include <luci/Partition.h>
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace partee
+{
+
+/**
+ * @brief Reads and parse file and return PartitionTable
+ */
+luci::PartitionTable read(const std::string &path);
+
+} // namespace partee
+
+#endif // __CIRCLE_PARTITION_READ_H__
target_link_libraries(circle-quantizer luci_service)
target_link_libraries(circle-quantizer luci_pass)
target_link_libraries(circle-quantizer luci_export)
+target_link_libraries(circle-quantizer luci_env)
target_link_libraries(circle-quantizer arser)
target_link_libraries(circle-quantizer vconone)
#include <luci/Service/Validate.h>
#include <luci/CircleExporter.h>
#include <luci/CircleFileExpContract.h>
+#include <luci/UserSettings.h>
#include <oops/InternalExn.h>
#include <arser/arser.h>
luci::CircleOptimizer optimizer;
auto options = optimizer.options();
+ auto settings = luci::UserSettings::settings();
const std::string qdqw = "--quantize_dequantize_weights";
const std::string qwmm = "--quantize_with_minmax";
const std::string rq = "--requantize";
+ const std::string gpd = "--generate_profile_data";
+
arser::Arser arser("circle-quantizer provides circle model quantization");
arser.add_argument("--version")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("Show version information and exit")
- .exit_with(print_version);
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("Show version information and exit")
+ .exit_with(print_version);
arser.add_argument(qdqw)
- .nargs(3)
- .type(arser::DataType::STR_VEC)
- .required(false)
- .help("Quantize-dequantize weight values required action before quantization. "
- "Three arguments required: input_dtype(float32) "
- "output_dtype(uint8) granularity(layer, channel)");
+ .nargs(3)
+ .type(arser::DataType::STR_VEC)
+ .required(false)
+ .help("Quantize-dequantize weight values required action before quantization. "
+ "Three arguments required: input_dtype(float32) "
+ "output_dtype(uint8) granularity(layer, channel)");
arser.add_argument(qwmm)
- .nargs(3)
- .type(arser::DataType::STR_VEC)
- .required(false)
- .help("Quantize with min/max values. "
- "Three arguments required: input_dtype(float32) "
- "output_dtype(uint8) granularity(layer, channel)");
+ .nargs(3)
+ .type(arser::DataType::STR_VEC)
+ .required(false)
+ .help("Quantize with min/max values. "
+ "Three arguments required: input_dtype(float32) "
+ "output_dtype(uint8) granularity(layer, channel)");
arser.add_argument(rq)
- .nargs(2)
- .type(arser::DataType::STR_VEC)
- .required(false)
- .help("Requantize a quantized model. "
- "Two arguments required: input_dtype(int8) "
- "output_dtype(uint8)");
+ .nargs(2)
+ .type(arser::DataType::STR_VEC)
+ .required(false)
+ .help("Requantize a quantized model. "
+ "Two arguments required: input_dtype(int8) "
+ "output_dtype(uint8)");
arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
+ arser.add_argument(gpd).nargs(0).required(false).default_value(false).help(
+ "This will turn on profiling data generation.");
+
try
{
arser.parse(argc, argv);
return 255;
}
- if (arser[qdqw])
{
- if (arser[qwmm] || arser[rq])
+ // only one of qdqw, qwmm, rq option can be used
+ int32_t opt_used = arser[qdqw] ? 1 : 0;
+ opt_used += arser[qwmm] ? 1 : 0;
+ opt_used += arser[rq] ? 1 : 0;
+ if (opt_used != 1)
{
print_exclusive_options();
return 255;
}
+ }
+
+ if (arser[qdqw])
+ {
auto values = arser.get<std::vector<std::string>>(qdqw);
if (values.size() != 3)
{
if (arser[qwmm])
{
- if (arser[qdqw] || arser[rq])
- {
- print_exclusive_options();
- return 255;
- }
auto values = arser.get<std::vector<std::string>>(qwmm);
if (values.size() != 3)
{
if (arser[rq])
{
- if (arser[qwmm] || arser[qdqw])
- {
- print_exclusive_options();
- return 255;
- }
auto values = arser.get<std::vector<std::string>>(rq);
if (values.size() != 2)
{
std::string input_path = arser.get<std::string>("input");
std::string output_path = arser.get<std::string>("output");
+ if (arser[gpd])
+ settings->set(luci::UserSettings::Key::ProfilingDataGen, true);
+
// Load model from the file
foder::FileLoader file_loader{input_path};
std::vector<char> model_data = file_loader.load();
int entry(int argc, char **argv)
{
arser::Arser arser{
- "circle-tensordump allows users to retrieve tensor information from a Circle model file"};
+ "circle-tensordump allows users to retrieve tensor information from a Circle model file"};
arser.add_argument("circle").nargs(1).type(arser::DataType::STR).help("Circle file path to dump");
arser.add_argument("--tensors").nargs(0).help("Dump to console");
arser.add_argument("--tensors_to_hdf5")
- .nargs(1)
- .type(arser::DataType::STR)
- .help("Dump to hdf5 file. Specify hdf5 file path to be dumped");
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .help("Dump to hdf5 file. Specify hdf5 file path to be dumped");
try
{
return;
auto dataspace = std::make_unique<H5::DataSpace>(dims.size(), dims.data());
auto dataset = std::make_unique<H5::DataSet>(
- file.createDataSet(group_name + "/" + dataset_name, type, *dataspace));
+ file.createDataSet(group_name + "/" + dataset_name, type, *dataspace));
dataset->write(data->data(), type);
}
{
auto dataspace = std::make_unique<H5::DataSpace>(H5S_SCALAR);
auto dataset = std::make_unique<H5::DataSet>(
- file.createDataSet(group_name + "/" + dataset_name, type, *dataspace));
+ file.createDataSet(group_name + "/" + dataset_name, type, *dataspace));
dataset->write(&data, type);
}
// create a group for each tensor whose name is its tensor name
std::string group_name = ::mangle(tensor->name()->c_str());
std::unique_ptr<H5::Group> tensor_group =
- std::make_unique<H5::Group>(file.createGroup(group_name));
+ std::make_unique<H5::Group>(file.createGroup(group_name));
// write a buffer data
uint32_t buff_idx = tensor->buffer();
## TFLITE RECIPE
Add(Net_Preactivation_BN_000 PASS fuse_preactivation_batchnorm)
+Add(Net_BroadcastTo_AddV2_000 PASS resolve_customop_add)
+Add(Net_BroadcastTo_AddV2_001 PASS resolve_customop_add)
+Add(Net_Conv_Add_Mul_000 PASS fuse_batchnorm_with_conv)
+Add(Net_Conv_Add_Mul_001 PASS fuse_batchnorm_with_conv)
+Add(Net_Conv_Add_Mul_002 PASS fuse_batchnorm_with_conv)
+Add(Net_Conv_Min_Max_000 PASS transform_min_max_to_relu6)
+Add(Net_Conv_Relu6_000 PASS fuse_activation_function)
+Add(Net_DwConv_BN_000 PASS fuse_batchnorm_with_dwconv)
+Add(Net_DwConv_BN_001 PASS fuse_batchnorm_with_dwconv)
+Add(Net_Reshape_Reshape_000 PASS remove_redundant_reshape)
+Add(Net_Squeeze_Squeeze_000 PASS substitute_squeeze_to_reshape)
Add(Net_TConv_Add_000 PASS fuse_add_with_tconv)
Add(Net_TConv_Add_001 PASS fuse_add_with_tconv)
Add(Net_TConv_Add_002 PASS fuse_add_with_tconv)
Add(Net_TConv_BN_000 PASS fuse_batchnorm_with_tconv)
Add(Net_TConv_BN_001 PASS fuse_batchnorm_with_tconv)
+Add(Net_TConv_BN_002 PASS fuse_batchnorm_with_tconv)
Add(Net_InstanceNorm_001 PASS fuse_instnorm)
Add(Net_InstanceNorm_002 PASS fuse_instnorm)
Add(Net_InstanceNorm_003 PASS fuse_instnorm)
+Add(Net_Maximum_Minimum_000 PASS transform_min_max_to_relu6)
Add(BatchMatMulV2_000 PASS resolve_customop_batchmatmul)
Add(MatMul_000 PASS resolve_customop_matmul)
Add(DepthwiseConv2D_003 PASS)
arser::Arser arser("circle2circle provides circle model optimization and transformations");
arser.add_argument("--version")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("Show version information and exit")
- .exit_with(print_version);
-
- arser.add_argument("--all").nargs(0).required(false).default_value(false).help(
- "Enable all optimize options");
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("Show version information and exit")
+ .exit_with(print_version);
+
+ arser.add_argument("--O1").nargs(0).required(false).default_value(false).help(
+ "Enable O1 optimize options");
+
+ arser.add_argument("--fold_add_v2")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will fold AddV2 operators with constant inputs");
+
+ arser.add_argument("--fold_cast")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will fold Cast operators with constant input");
arser.add_argument("--fold_dequantize")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will fold dequantize op");
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will fold dequantize op");
+
+ arser.add_argument("--fold_sparse_to_dense")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will fold SparseToDense operator");
+
+ arser.add_argument("--forward_reshape_to_unaryop")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will move Reshape after UnaryOp for centain condition");
arser.add_argument("--fuse_activation_function")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will fuse Activation function to a preceding operator");
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will fuse Activation function to a preceding operator");
arser.add_argument("--fuse_add_with_tconv")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will fuse Add operator to Transposed Convolution operator");
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will fuse Add operator to Transposed Convolution operator");
+
+ arser.add_argument("--fuse_batchnorm_with_conv")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will fuse BatchNorm operators to Convolution operator");
+
+ arser.add_argument("--fuse_batchnorm_with_dwconv")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will fuse BatchNorm operators to Depthwise Convolution operator");
arser.add_argument("--fuse_batchnorm_with_tconv")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will fuse BatchNorm operators to Transposed Convolution operator");
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will fuse BatchNorm operators to Transposed Convolution operator");
arser.add_argument("--fuse_bcq")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will fuse operators and apply Binary Coded Quantization");
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will fuse operators and apply Binary Coded Quantization");
arser.add_argument("--fuse_instnorm")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will fuse operators to InstanceNorm operator");
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will fuse operators to InstanceNorm operator");
arser.add_argument("--make_batchnorm_gamma_positive")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will make negative gamma of BatchNorm into a small positive value (1e-10). Note "
- "that this pass can change the execution result of the model. So, use it only when the "
- "impact is known to be acceptable.");
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will make negative gamma of BatchNorm into a small positive value (1e-10). Note "
+ "that this pass can change the execution result of the model. So, use it only when the "
+ "impact is known to be acceptable.");
arser.add_argument("--fuse_preactivation_batchnorm")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will fuse BatchNorm operators of pre-activations to Convolution operator");
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will fuse BatchNorm operators of pre-activations to Convolution operator");
+
+ arser.add_argument("--remove_redundant_reshape")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will fuse or remove subsequent Reshape operators");
arser.add_argument("--remove_redundant_transpose")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will fuse or remove subsequent Transpose operators");
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will fuse or remove subsequent Transpose operators");
+
+ arser.add_argument("--remove_unnecessary_reshape")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will remove unnecessary reshape operators");
+
+ arser.add_argument("--remove_unnecessary_slice")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will remove unnecessary slice operators");
+
+ arser.add_argument("--remove_unnecessary_strided_slice")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will remove unnecessary strided slice operators");
+
+ arser.add_argument("--remove_unnecessary_split")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will remove unnecessary split operators");
arser.add_argument("--replace_cw_mul_add_with_depthwise_conv")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will replace channel-wise mul/add with DepthwiseConv2D operator");
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will replace channel-wise mul/add with DepthwiseConv2D operator");
arser.add_argument("--resolve_customop_add")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will convert Custom(Add) to Add operator");
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will convert Custom(Add) to Add operator");
arser.add_argument("--resolve_customop_batchmatmul")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will convert Custom(BatchMatmul) to BatchMatmul operator");
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will convert Custom(BatchMatmul) to BatchMatmul operator");
arser.add_argument("--resolve_customop_matmul")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will convert Custom(Matmul) to Matmul operator");
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will convert Custom(Matmul) to Matmul operator");
arser.add_argument("--shuffle_weight_to_16x1float32")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will convert weight format of FullyConnected to SHUFFLED16x1FLOAT32. Note that "
- "it only converts weights whose row is a multiple of 16");
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will convert weight format of FullyConnected to SHUFFLED16x1FLOAT32. Note that "
+ "it only converts weights whose row is a multiple of 16");
arser.add_argument("--substitute_pack_to_reshape")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will convert single input Pack to Reshape");
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will convert single input Pack to Reshape");
+
+ arser.add_argument("--substitute_squeeze_to_reshape")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will convert certain condition Squeeze to Reshape");
+
+ arser.add_argument("--substitute_transpose_to_reshape")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will convert single input Transpose to Reshape");
+
+ arser.add_argument("--convert_nchw_to_nhwc")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("Experimental: This will convert NCHW operators to NHWC under the assumption that "
+ "input model is NCHW.");
+
+ arser.add_argument("--nchw_to_nhwc_preserve_input_shape")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("Preserve the input shape of the model (argument for --convert_nchw_to_nhwc).");
+
+ arser.add_argument("--nchw_to_nhwc_preserve_output_shape")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("Preserve the output shape of the model (argument for --convert_nchw_to_nhwc).");
+
+ arser.add_argument("--transform_min_max_to_relu6")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("Transform Minimum-Maximum pattern to Relu6 operator");
arser.add_argument("--mute_warnings")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will turn off warning messages");
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will turn off warning messages");
arser.add_argument("--disable_validation")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will turn off operator validations. May help input model investigation.");
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will turn off operator validations. May help input model investigation.");
+
+ arser.add_argument("--generate_profile_data")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will turn on profiling data generation.");
arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
// sparsification argument
arser.add_argument("--sparsify_tensor")
- .nargs(1)
- .type(arser::DataType::STR)
- .required(false)
- .help("Tensor name that you want to sparsify");
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .required(false)
+ .help("Tensor name that you want to sparsify");
arser.add_argument("--sparsify_traversal_order")
- .nargs(1)
- .type(arser::DataType::STR)
- .required(false)
- .default_value("0,1,2,3")
- .help("Traversal order of dimensions. Default value: 0,1,2,3");
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .required(false)
+ .default_value("0,1,2,3")
+ .help("Traversal order of dimensions. Default value: 0,1,2,3");
arser.add_argument("--sparsify_format")
- .nargs(1)
- .type(arser::DataType::STR)
- .required(false)
- .default_value("d,s")
- .help("Format of each dimension. 'd' stands for dense, 's' stands for sparse(CSR). Default "
- "value: d,s");
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .required(false)
+ .default_value("d,s")
+ .help("Format of each dimension. 'd' stands for dense, 's' stands for sparse(CSR). Default "
+ "value: d,s");
arser.add_argument("--sparsify_block_size")
- .nargs(1)
- .type(arser::DataType::STR)
- .required(false)
- .help("Size of each block dimension");
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .required(false)
+ .help("Size of each block dimension");
arser.add_argument("--sparsify_block_map")
- .nargs(1)
- .type(arser::DataType::STR)
- .required(false)
- .default_value("0,1")
- .help("Map from block dimension to the original tensor dimension. Default value: 0,1");
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .required(false)
+ .default_value("0,1")
+ .help("Map from block dimension to the original tensor dimension. Default value: 0,1");
try
{
return 255;
}
- if (arser.get<bool>("--all"))
+ if (arser.get<bool>("--O1"))
{
options->enable(Algorithms::FuseBCQ);
options->enable(Algorithms::FuseInstanceNorm);
options->enable(Algorithms::RemoveRedundantTranspose);
options->enable(Algorithms::SubstitutePackToReshape);
}
+ if (arser.get<bool>("--fold_add_v2"))
+ options->enable(Algorithms::FoldAddV2);
+ if (arser.get<bool>("--fold_cast"))
+ options->enable(Algorithms::FoldCast);
if (arser.get<bool>("--fold_dequantize"))
options->enable(Algorithms::FoldDequantize);
+ if (arser.get<bool>("--fold_sparse_to_dense"))
+ options->enable(Algorithms::FoldSparseToDense);
+ if (arser.get<bool>("--forward_reshape_to_unaryop"))
+ options->enable(Algorithms::ForwardReshapeToUnaryOp);
if (arser.get<bool>("--fuse_activation_function"))
options->enable(Algorithms::FuseActivationFunction);
+ if (arser.get<bool>("--fuse_batchnorm_with_conv"))
+ options->enable(Algorithms::FuseBatchNormWithConv);
if (arser.get<bool>("--fuse_add_with_tconv"))
options->enable(Algorithms::FuseAddWithTConv);
+ if (arser.get<bool>("--fuse_batchnorm_with_dwconv"))
+ options->enable(Algorithms::FuseBatchNormWithDwConv);
if (arser.get<bool>("--fuse_batchnorm_with_tconv"))
options->enable(Algorithms::FuseBatchNormWithTConv);
if (arser.get<bool>("--fuse_bcq"))
options->enable(Algorithms::MakeBatchNormGammaPositive);
if (arser.get<bool>("--fuse_preactivation_batchnorm"))
options->enable(Algorithms::FusePreActivationBatchNorm);
+ if (arser.get<bool>("--remove_redundant_reshape"))
+ options->enable(Algorithms::RemoveRedundantReshape);
if (arser.get<bool>("--remove_redundant_transpose"))
options->enable(Algorithms::RemoveRedundantTranspose);
+ if (arser.get<bool>("--remove_unnecessary_reshape"))
+ options->enable(Algorithms::RemoveUnnecessaryReshape);
+ if (arser.get<bool>("--remove_unnecessary_slice"))
+ options->enable(Algorithms::RemoveUnnecessarySlice);
+ if (arser.get<bool>("--remove_unnecessary_strided_slice"))
+ options->enable(Algorithms::RemoveUnnecessaryStridedSlice);
+ if (arser.get<bool>("--remove_unnecessary_split"))
+ options->enable(Algorithms::RemoveUnnecessarySplit);
if (arser.get<bool>("--replace_cw_mul_add_with_depthwise_conv"))
options->enable(Algorithms::ReplaceMulAddWithDepthwiseConv);
if (arser.get<bool>("--resolve_customop_add"))
options->enable(Algorithms::ShuffleWeightTo16x1Float32);
if (arser.get<bool>("--substitute_pack_to_reshape"))
options->enable(Algorithms::SubstitutePackToReshape);
+ if (arser.get<bool>("--substitute_squeeze_to_reshape"))
+ options->enable(Algorithms::SubstituteSqueezeToReshape);
+ if (arser.get<bool>("--substitute_transpose_to_reshape"))
+ options->enable(Algorithms::SubstituteTransposeToReshape);
+ if (arser.get<bool>("--transform_min_max_to_relu6"))
+ options->enable(Algorithms::TransformMinMaxToRelu6Pass);
if (arser.get<bool>("--mute_warnings"))
settings->set(luci::UserSettings::Key::MuteWarnings, true);
if (arser.get<bool>("--disable_validation"))
settings->set(luci::UserSettings::Key::DisableValidation, true);
+ if (arser.get<bool>("--generate_profile_data"))
+ settings->set(luci::UserSettings::Key::ProfilingDataGen, true);
std::string input_path = arser.get<std::string>("input");
std::string output_path = arser.get<std::string>("output");
arser.get<std::string>("--sparsify_block_map"));
}
+ if (arser.get<bool>("--convert_nchw_to_nhwc"))
+ {
+ options->enable(Algorithms::ConvertNCHWToNHWC);
+ if (arser.get<bool>("--nchw_to_nhwc_preserve_input_shape"))
+ options->param(AlgorithmParameters::NCHW_to_NHWC_preserve_input_shape, "true");
+ if (arser.get<bool>("--nchw_to_nhwc_preserve_output_shape"))
+ options->param(AlgorithmParameters::NCHW_to_NHWC_preserve_output_shape, "true");
+ }
+
// Load model from the file
foder::FileLoader file_loader{input_path};
std::vector<char> model_data;
{
assert(_ptr < N);
_argv[_ptr] = new char[strlen(in) + 1];
- strcpy(_argv[_ptr], in);
+ strncpy(_argv[_ptr], in, strlen(in) + 1);
_ptr++;
}
private:
pchar_t _argv[N] = {
- nullptr,
+ nullptr,
};
size_t _ptr = 0;
};
target_include_directories(circlechef_circle PRIVATE src)
target_link_libraries(circlechef_circle circlechef_proto)
target_link_libraries(circlechef_circle mio_circle)
-target_link_libraries(circlechef_circle stdex)
target_link_libraries(circlechef_circle cwrap)
target_link_libraries(circlechef_circle souschef)
return circlechef::UINT8;
case circle::TensorType_BOOL:
return circlechef::BOOL;
+ case circle::TensorType_INT16:
+ return circlechef::INT16;
// TODO handle other types
// TensorType_FLOAT16
// TensorType_STRING
- // TensorType_INT16
// TensorType_COMPLEX64
default:
throw std::runtime_error{"unsupported tensor type"};
file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
add_library(circlechef_core STATIC ${SOURCES})
target_include_directories(circlechef_core PUBLIC include)
target_include_directories(circlechef_core PRIVATE src)
-target_link_libraries(circlechef_core circlechef_proto)
-target_link_libraries(circlechef_core circlechef_log)
-target_link_libraries(circlechef_core mio_circle)
-target_link_libraries(circlechef_core souschef)
+target_link_libraries(circlechef_core PUBLIC circlechef_proto)
+target_link_libraries(circlechef_core PUBLIC circlechef_log)
+target_link_libraries(circlechef_core PUBLIC mio_circle)
+target_link_libraries(circlechef_core PUBLIC souschef)
+target_link_libraries(circlechef_core PRIVATE nncc_coverage)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(circlechef_core_test ${TESTS})
+target_include_directories(circlechef_core_test PRIVATE src)
+target_link_libraries(circlechef_core_test circlechef_core)
+target_link_libraries(circlechef_core_test nncc_coverage)
return circle::TensorType_INT64;
case circlechef::BOOL:
return circle::TensorType_BOOL;
+ case circlechef::INT16:
+ return circle::TensorType_INT16;
default:
break;
}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Convert.h"
+
+#include <gtest/gtest.h>
+
+TEST(ConvertTest, as_circle_padding)
+{
+ ASSERT_EQ(circle::Padding_SAME, as_circle_padding(circlechef::SAME));
+ ASSERT_EQ(circle::Padding_VALID, as_circle_padding(circlechef::VALID));
+}
+
+TEST(ConvertTest, as_circle_padding_NEG)
+{
+ EXPECT_THROW(as_circle_padding(static_cast<circlechef::Padding>(99)), std::runtime_error);
+}
+
+TEST(ConvertTest, as_circle_activation)
+{
+ ASSERT_EQ(circle::ActivationFunctionType_NONE, as_circle_activation(circlechef::NONE));
+ ASSERT_EQ(circle::ActivationFunctionType_RELU, as_circle_activation(circlechef::RELU));
+ ASSERT_EQ(circle::ActivationFunctionType_RELU6, as_circle_activation(circlechef::RELU6));
+}
+
+TEST(ConvertTest, as_circle_activation_NEG)
+{
+ EXPECT_THROW(as_circle_activation(static_cast<circlechef::Activation>(99)), std::runtime_error);
+}
+
+TEST(ConvertTest, as_circle_tensortype)
+{
+ ASSERT_EQ(circle::TensorType_FLOAT32, as_circle_tensortype(circlechef::FLOAT32));
+ ASSERT_EQ(circle::TensorType_INT32, as_circle_tensortype(circlechef::INT32));
+ ASSERT_EQ(circle::TensorType_UINT8, as_circle_tensortype(circlechef::UINT8));
+ ASSERT_EQ(circle::TensorType_INT64, as_circle_tensortype(circlechef::INT64));
+ ASSERT_EQ(circle::TensorType_BOOL, as_circle_tensortype(circlechef::BOOL));
+ ASSERT_EQ(circle::TensorType_INT16, as_circle_tensortype(circlechef::INT16));
+}
+
+TEST(ConvertTest, as_circle_tensortype_NEG)
+{
+ EXPECT_THROW(as_circle_tensortype(static_cast<circlechef::TensorType>(99)), std::runtime_error);
+}
{
public:
GeneratedModelImpl(std::unique_ptr<flatbuffers::FlatBufferBuilder> &&builder)
- : _builder{std::move(builder)}
+ : _builder{std::move(builder)}
{
// DO NOTHING
}
static DataChefRegistry fp32;
static DataChefRegistry u8;
static DataChefRegistry boolean;
+ static DataChefRegistry s16;
switch (type)
{
return u8;
case circlechef::BOOL:
return boolean;
+ case circlechef::INT16:
+ return s16;
default:
break;
}
// Initialize Data Chef Registry
#define DATA_CHEF(TYPE, NAME, FACTORY_CLASS) \
data_chef_registry(::circlechef::TYPE) \
- .add(#NAME, std::unique_ptr<FACTORY_CLASS>(new FACTORY_CLASS()));
+ .add(#NAME, std::unique_ptr<FACTORY_CLASS>(new FACTORY_CLASS()));
#include <souschef/DataChef.def>
#undef DATA_CHEF
// Create FlatBufferBuilder
//
auto flatbuffer_builder =
- std::unique_ptr<flatbuffers::FlatBufferBuilder>(new flatbuffers::FlatBufferBuilder(1024));
+ std::unique_ptr<flatbuffers::FlatBufferBuilder>(new flatbuffers::FlatBufferBuilder(1024));
// Operand-related
std::vector<flatbuffers::Offset<::circle::Buffer>> buffer_vec;
// Create OperatorCode with Builtin Operator
std::map<circle::BuiltinOperator, int32_t> builtin_code_map =
- gather_builtincode_map(model_recipe);
+ gather_builtincode_map(model_recipe);
for (auto const &opcode : builtin_code_map)
{
circle::OperatorCodeBuilder code_builder{*flatbuffer_builder};
// Return "GenerateModel"
return GeneratedModel{
- std::unique_ptr<GeneratedModelImpl>(new GeneratedModelImpl(std::move(flatbuffer_builder)))};
+ std::unique_ptr<GeneratedModelImpl>(new GeneratedModelImpl(std::move(flatbuffer_builder)))};
}
} // namespace circlechef
circle::BCQFullyConnectedOptionsBuilder bcq_fully_connected_options_builder{fbb};
bcq_fully_connected_options_builder.add_weights_hidden_size(
- operation.bcq_fully_connected_options().weights_hidden_size());
+ operation.bcq_fully_connected_options().weights_hidden_size());
bcq_fully_connected_options_builder.add_fused_activation_function(
- as_circle_activation(operation.bcq_fully_connected_options().activation()));
+ as_circle_activation(operation.bcq_fully_connected_options().activation()));
return bcq_fully_connected_options_builder.Finish().Union();
}
circle::BCQGatherOptionsBuilder bcq_gather_options_builder{fbb};
bcq_gather_options_builder.add_input_hidden_size(
- operation.bcq_gather_options().input_hidden_size());
+ operation.bcq_gather_options().input_hidden_size());
bcq_gather_options_builder.add_axis(operation.bcq_gather_options().axis());
return bcq_gather_options_builder.Finish().Union();
circle::BatchMatMulOptionsBuilder batch_matmul_options_options_builder{fbb};
batch_matmul_options_options_builder.add_adjoint_lhs(
- operation.batch_matmul_options().adjoint_lhs());
+ operation.batch_matmul_options().adjoint_lhs());
batch_matmul_options_options_builder.add_adjoint_rhs(
- operation.batch_matmul_options().adjoint_rhs());
+ operation.batch_matmul_options().adjoint_rhs());
return batch_matmul_options_options_builder.Finish().Union();
}
UINT8 = 3;
INT64 = 4;
BOOL = 6;
+ INT16 = 7;
}
message TensorShape {
--- /dev/null
+operand {
+ name: "ifm1"
+ type: INT16
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+ name: "constant"
+ type: INT16
+ shape { dim: 1 dim: 4 dim: 3 dim: 4 }
+ filler {
+ tag: "gaussian"
+ arg: "3.0"
+ arg: "10.0"
+ }
+}
+operand {
+ name: "ofm"
+ type: INT16
+ shape { dim: 1 dim: 4 dim: 4 dim: 4 }
+}
+operation {
+ type: "BatchMatMul"
+ input: "ifm1"
+ input: "constant"
+ output: "ofm"
+ batch_matmul_options {
+ adjoint_lhs: false
+ adjoint_rhs: false
+ }
+}
+input: "ifm1"
+output: "ofm"
add_executable(circlechef Driver.cpp)
target_link_libraries(circlechef circlechef_core)
target_link_libraries(circlechef safemain)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(circlechef_test Driver.test.cpp Driver.cpp)
+target_link_libraries(circlechef_test circlechef_core)
#include <iostream>
-int entry(int argc, char **argv)
+int entry_stream(std::istream &is)
{
int32_t model_version = 1;
// Read a model recipe from standard input
{
- google::protobuf::io::IstreamInputStream iis{&std::cin};
+ google::protobuf::io::IstreamInputStream iis{&is};
if (!google::protobuf::TextFormat::Parse(&iis, &model_recipe))
{
std::cerr << "ERROR: Failed to parse recipe" << std::endl;
return 0;
}
+
+int entry(int, char **)
+{
+ // forward to entry_stream
+ return entry_stream(std::cin);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+// entry function to test from Driver.cpp
+int entry_stream(std::istream &is);
+
+TEST(CircleChefDriverTest, entry_empty_NEG)
+{
+ std::istringstream empty_input("");
+
+ ASSERT_EQ(0, entry_stream(empty_input));
+}
+
+TEST(CircleChefDriverTest, entry_invaid_NEG)
+{
+ std::istringstream empty_input("invalid: input");
+
+ ASSERT_NE(0, entry_stream(empty_input));
+}
+
+TEST(CircleChefDriverTest, entry_invaid_version_NEG)
+{
+ std::istringstream empty_input("version: 9999");
+
+ ASSERT_NE(0, entry_stream(empty_input));
+}
{
arser::Arser arser;
arser.add_argument("recipe")
- .type(arser::DataType::STR)
- .help("Source recipe file path to convert");
+ .type(arser::DataType::STR)
+ .help("Source recipe file path to convert");
arser.add_argument("circle").type(arser::DataType::STR).help("Target circle file path");
try
{
arser::Arser arser;
arser.add_argument("circle")
- .type(arser::DataType::STR)
- .help("Source circle file path to convert");
+ .type(arser::DataType::STR)
+ .help("Source circle file path to convert");
arser.add_argument("recipe").type(arser::DataType::STR).help("Target recipe file path");
try
- mio-circle
- safemain
-- stdex
- FlatBuffers
#include "Read.h"
#include "OpPrinter.h"
+#include "MetadataPrinter.h"
#include <ostream>
auto opcodes = reader.opcodes();
auto buffers = reader.buffers();
+ auto metadata = reader.metadata();
// dump operator_codes
os << "Operator Codes: [order] OpCodeName (OpCode Enum)" << std::endl;
}
os << std::endl;
+ // dump metadata
+ if (metadata != nullptr)
+ {
+ os << "metadata : B(index) name" << std::endl;
+ for (uint32_t i = 0; i < metadata->Length(); ++i)
+ {
+ const auto buff_id = metadata->Get(i)->buffer();
+ const auto metadata_name = metadata->Get(i)->name()->str();
+ os << "B(" << buff_id << ") " << metadata_name << std::endl;
+
+ const uint8_t *buff_data;
+ reader.buffer_info(buff_id, &buff_data);
+ if (auto meta_prn = MetadataPrinterRegistry::get().lookup(metadata_name))
+ {
+ meta_prn->print(buff_data, os);
+ }
+ }
+ os << std::endl;
+ }
+
for (uint32_t sg = 0; sg < num_subgraph; ++sg)
{
reader.select_subgraph(sg);
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MetadataPrinter.h"
+
+#include <cassert>
+#include <string>
+#include <vector>
+
+namespace circledump
+{
+
+class SourceTablePrinter : public MetadataPrinter
+{
+public:
+ /**
+ * source table consists of following parts
+ * - [ entry_number : uint32_t ]
+ * - [ id : uint32_t ][ length : uint32_t ][ data : 'length' Bytes ] * entry_number
+ */
+ virtual void print(const uint8_t *buffer, std::ostream &os) const override
+ {
+ if (buffer)
+ {
+ os << " [node_id : node_name]" << std::endl;
+ auto cur = buffer;
+ // entry number
+ const uint32_t num = *reinterpret_cast<const uint32_t *>(cur);
+ cur += sizeof(uint32_t);
+ for (uint32_t entry = 0; entry < num; entry++)
+ {
+ // id
+ const uint32_t node_id = *reinterpret_cast<const uint32_t *>(cur);
+ cur += sizeof(uint32_t);
+ // length
+ const uint32_t len = *reinterpret_cast<const uint32_t *>(cur);
+ cur += sizeof(uint32_t);
+ assert(len != 0);
+ // data
+ // non-empty 'data' has trailing '\0'. Let's exclude it.
+ std::string node_name = std::string(cur, cur + len - 1);
+ cur += len;
+
+ // print
+ os << " [" << node_id << " : " << node_name << "]" << std::endl;
+ }
+ }
+ }
+};
+
+class OpTablePrinter : public MetadataPrinter
+{
+public:
+ /**
+ * op table consists of following parts
+ * - [ entry_number : uint32_t ]
+ * - [ id : uint32_t ][ length : uint32_t ][ origin_ids : length * uint32_t ] * entry_number
+ */
+ virtual void print(const uint8_t *buffer, std::ostream &os) const override
+ {
+ if (buffer)
+ {
+ os << " [node_id : origin_ids]" << std::endl;
+ auto cur = buffer;
+ // entry number
+ const uint32_t num = *reinterpret_cast<const uint32_t *>(cur);
+ cur += sizeof(uint32_t);
+ for (uint32_t entry = 0; entry < num; entry++)
+ {
+ // id
+ const uint32_t node_id = *reinterpret_cast<const uint32_t *>(cur);
+ cur += sizeof(uint32_t);
+ // length
+ const uint32_t len = *reinterpret_cast<const uint32_t *>(cur);
+ cur += sizeof(uint32_t);
+ assert(len != 0);
+ // origin_ids
+ std::vector<uint32_t> origin_ids;
+ for (uint32_t o = 0; o < len; o++)
+ {
+ origin_ids.push_back(*reinterpret_cast<const uint32_t *>(cur));
+ cur += sizeof(uint32_t);
+ }
+
+ // print
+ os << " [" << node_id << " : ";
+ uint32_t i = 0;
+ for (const auto &id : origin_ids)
+ {
+ if (i++)
+ os << ", ";
+ os << id;
+ }
+ os << "]" << std::endl;
+ }
+ }
+ }
+};
+
+MetadataPrinterRegistry::MetadataPrinterRegistry()
+{
+ _metadata_map["ONE_source_table"] = std::make_unique<SourceTablePrinter>();
+ _metadata_map["ONE_op_table"] = std::make_unique<OpTablePrinter>();
+}
+
+} // namespace circledump
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLEDUMP_METADATA_PRINTER_H__
+#define __CIRCLEDUMP_METADATA_PRINTER_H__
+
+#include <ostream>
+#include <string>
+#include <map>
+#include <memory>
+
+namespace circledump
+{
+
+class MetadataPrinter
+{
+public:
+ virtual void print(const uint8_t * /* buffer */, std::ostream &) const = 0;
+};
+
+class MetadataPrinterRegistry
+{
+public:
+ MetadataPrinterRegistry();
+
+public:
+ const MetadataPrinter *lookup(std::string op) const
+ {
+ if (_metadata_map.find(op) == _metadata_map.end())
+ return nullptr;
+
+ return _metadata_map.at(op).get();
+ }
+
+public:
+ static MetadataPrinterRegistry &get()
+ {
+ static MetadataPrinterRegistry me;
+ return me;
+ }
+
+private:
+ std::map<std::string /* metadata name */, std::unique_ptr<MetadataPrinter>> _metadata_map;
+};
+
+} // namespace circledump
+
+#endif // __CIRCLEDUMP_METADATA_PRINTER_H__
}
};
+class BidirectionalSequenceLSTMPrinter : public OpPrinter
+{
+public:
+ void options(const circle::Operator *op, std::ostream &os) const override
+ {
+ if (auto *params = op->builtin_options_as_BidirectionalSequenceLSTMOptions())
+ {
+ os << " ";
+ os << "Activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
+ << ") ";
+ os << "cell_clip(" << params->cell_clip() << ") ";
+ os << "proj_clip(" << params->proj_clip() << ") ";
+ os << "time_major(" << params->time_major() << ") ";
+ os << "asymmetric_quantize_inputs(" << params->asymmetric_quantize_inputs() << ") ";
+ os << "merge_outputs(" << params->merge_outputs() << ") ";
+ os << std::endl;
+ }
+ }
+};
+
class CastPrinter : public OpPrinter
{
public:
os << "Stride.H(" << conv_params->stride_h() << ") ";
os << "DepthMultiplier(" << conv_params->depth_multiplier() << ") ";
os << "Dilation.W(" << conv_params->dilation_w_factor() << ") ";
- os << "Dilation.H(" << conv_params->dilation_h_factor() << ")";
+ os << "Dilation.H(" << conv_params->dilation_h_factor() << ") ";
os << "Activation("
<< EnumNameActivationFunctionType(conv_params->fused_activation_function()) << ") ";
os << std::endl;
}
};
+class FakeQuantPrinter : public OpPrinter
+{
+public:
+ void options(const circle::Operator *op, std::ostream &os) const override
+ {
+ if (auto *params = op->builtin_options_as_FakeQuantOptions())
+ {
+ os << " ";
+ os << "Min(" << params->min() << ") ";
+ os << "Max(" << params->max() << ") ";
+ os << "NumBits(" << params->num_bits() << ") ";
+ os << std::boolalpha;
+ os << "NarrowRange(" << params->narrow_range() << ") ";
+ os << std::noboolalpha;
+ os << std::endl;
+ }
+ }
+};
+
class FullyConnectedPrinter : public OpPrinter
{
public:
_op_map[circle::BuiltinOperator_ARG_MIN] = make_unique<ArgMinPrinter>();
_op_map[circle::BuiltinOperator_AVERAGE_POOL_2D] = make_unique<Pool2DPrinter>();
_op_map[circle::BuiltinOperator_BATCH_MATMUL] = make_unique<BatchMatMulPrinter>();
+ _op_map[circle::BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM] =
+ make_unique<BidirectionalSequenceLSTMPrinter>();
_op_map[circle::BuiltinOperator_CAST] = make_unique<CastPrinter>();
// There is no Option for CEIL
_op_map[circle::BuiltinOperator_CONCATENATION] = make_unique<ConcatenationPrinter>();
_op_map[circle::BuiltinOperator_DEPTHWISE_CONV_2D] = make_unique<DepthwiseConv2DPrinter>();
// There is no Option for DEQUANTIZE
_op_map[circle::BuiltinOperator_DIV] = make_unique<DivPrinter>();
+ _op_map[circle::BuiltinOperator_FAKE_QUANT] = make_unique<FakeQuantPrinter>();
// There is no Option for FLOOR
// There is no Option for FLOOR_MOD
_op_map[circle::BuiltinOperator_FULLY_CONNECTED] = make_unique<FullyConnectedPrinter>();
_op_map[circle::BuiltinOperator_L2_POOL_2D] = make_unique<Pool2DPrinter>();
_op_map[circle::BuiltinOperator_LEAKY_RELU] = make_unique<LeakyReluPrinter>();
_op_map[circle::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION] =
- make_unique<LocalResponseNormalizationPrinter>();
+ make_unique<LocalResponseNormalizationPrinter>();
// There is no Option for LOG
// There is no Option for LOGISTIC
// There is no Option for LOG_SOFTMAX
_op_map[circle::BuiltinOperator_RESHAPE] = make_unique<ReshapePrinter>();
_op_map[circle::BuiltinOperator_RESIZE_BILINEAR] = make_unique<ResizeBilinearPrinter>();
_op_map[circle::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR] =
- make_unique<ResizeNearestNeighborPrinter>();
+ make_unique<ResizeNearestNeighborPrinter>();
_op_map[circle::BuiltinOperator_REVERSE_SEQUENCE] = make_unique<ReverseSequencePrinter>();
// There is no Option for ROUND
// There is no Option for SELECT
_op_map[circle::BuiltinOperator_TRANSPOSE_CONV] = make_unique<TransposeConvPrinter>();
// There is no Option for TOPK_V2
_op_map[circle::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM] =
- make_unique<UnidirectionalSequenceLSTMPrinter>();
+ make_unique<UnidirectionalSequenceLSTMPrinter>();
_op_map[circle::BuiltinOperator_UNIQUE] = make_unique<UniquePrinter>();
_op_map[circle::BuiltinOperator_WHILE] = make_unique<WhilePrinter>();
_op_map[circle::BuiltinOperator_CUSTOM] = make_unique<CustomOpPrinter>();
_version = model->version();
_subgraphs = model->subgraphs();
_buffers = model->buffers();
+ _metadata = model->metadata();
auto opcodes = model->operator_codes();
for (const ::circle::OperatorCode *opcode : *opcodes)
using CircleBuffers_t = flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>;
using CircleTensors_t = flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>;
using CircleOperators_t = flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>;
+ using CircleMetadata_t = flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>;
public:
Reader(const circle::Model *model);
const std::vector<int32_t> &inputs() const { return _inputs; }
const std::vector<int32_t> &outputs() const { return _outputs; }
const circle::DataFormat &data_format() const { return _data_format; }
+ const CircleMetadata_t *metadata() const { return _metadata; }
uint32_t num_subgraph() const { return _subgraphs->Length(); }
const CircleBuffers_t *_buffers{nullptr};
const CircleTensors_t *_tensors{nullptr};
const CircleOperators_t *_operators{nullptr};
+ const CircleMetadata_t *_metadata{nullptr};
uint32_t _subgraph_index;
std::string _subgraph_name;
GTest_AddTEst(cli_test ${TESTS})
target_link_libraries(cli_test cli)
-target_link_libraries(cli_test stdex)
#include "cli/App.h"
-#include <stdex/Memory.h>
+#include <memory>
#include <gtest/gtest.h>
cli::App app("test");
std::string args;
- app.insert("record", stdex::make_unique<RecordCommand>(3, args));
+ app.insert("record", std::make_unique<RecordCommand>(3, args));
const char *argv[] = {"record", "hello", "world"};
# NOTE Some coco_core PUBLIC headers include angkor headers
target_link_libraries(coco_core PUBLIC angkor)
target_link_libraries(coco_core PRIVATE pepper_assert)
-target_link_libraries(coco_core PRIVATE stdex)
# Let's apply nncc common compile options
# NOTE This will enable strict compilation (warnings as error).
# Please refer to top-level CMakeLists.txt for details
GTest_AddTest(coco_core_test ${TESTS})
target_link_libraries(coco_core_test coco_core)
-target_link_libraries(coco_core_test stdex)
{
public:
FeatureShape(uint32_t depth, uint32_t height, uint32_t width)
- : Shape{depth, height, width}, _batch{1}
+ : Shape{depth, height, width}, _batch{1}
{
// DO NOTHING
}
FeatureShape(uint32_t batch, uint32_t depth, uint32_t height, uint32_t width)
- : Shape{depth, height, width}, _batch{batch}
+ : Shape{depth, height, width}, _batch{batch}
{
// DO NOTHING
}
/**
* @brief Return the associated instruction if exists.
- */
+ */
struct Locatable
{
virtual ~Locatable() = default;
const Sqrt *asSqrt(void) const override { return this; }
};
-} // namesapce coco
+} // namespace coco
#endif // __COCO_IR_OPS_H__
public:
Padding2D(uint32_t top, uint32_t bottom, uint32_t left, uint32_t right)
- : _top{top}, _bottom{bottom}, _left{left}, _right{right}
+ : _top{top}, _bottom{bottom}, _left{left}, _right{right}
{
// DO NOTHING
}
struct Object
{
};
-}
+} // namespace
TEST(ADT_PTR_LIST, ctor)
{
void free(Object *o) { release(o); }
};
-}
+} // namespace
TEST(ADT_PTR_MANAGER, usecase)
{
#include "coco/IR/BagManager.h"
-#include <stdex/Memory.h>
+#include <memory>
namespace coco
{
Bag *BagManager::create(uint32_t size)
{
- auto bag = stdex::make_unique<Bag>(size);
+ auto bag = std::make_unique<Bag>(size);
modulize(bag.get());
return take(std::move(bag));
}
#include "coco/IR/BlockManager.h"
-#include <stdex/Memory.h>
-
+#include <memory>
#include <cassert>
namespace coco
Block *BlockManager::create(void)
{
- auto blk = stdex::make_unique<Block>();
+ auto blk = std::make_unique<Block>();
modulize(blk.get());
return take(std::move(blk));
}
#include <vector>
#include <memory>
-#include <stdex/Memory.h>
-
#include <gtest/gtest.h>
-using stdex::make_unique;
+using std::make_unique;
namespace
{
#include "coco/IR/FeatureObject.h"
-#include <stdex/Memory.h>
+#include <memory>
#include "Producer.mock.h"
#include <gtest/gtest.h>
-using stdex::make_unique;
+using std::make_unique;
namespace
{
#include "coco/IR/InputManager.h"
-#include <stdex/Memory.h>
+#include <memory>
namespace coco
{
Input *InputManager::create(const nncc::core::ADT::tensor::Shape &shape)
{
- auto input = stdex::make_unique<Input>(shape);
+ auto input = std::make_unique<Input>(shape);
modulize(input.get());
return take(std::move(input));
}
#include "coco/IR/Module.h"
-#include <stdex/Memory.h>
+#include <memory>
-using stdex::make_unique;
+using std::make_unique;
namespace
{
#include "coco/IR/FeatureObject.h"
#include "coco/IR/KernelObject.h"
-#include <stdex/Memory.h>
-
+#include <memory>
#include <cassert>
-using stdex::make_unique;
+using std::make_unique;
namespace coco
{
#include "coco/IR/OpManager.h"
-#include <stdex/Memory.h>
-
+#include <memory>
#include <cassert>
#include <queue>
#include <set>
-using stdex::make_unique;
+using std::make_unique;
namespace coco
{
#include <vector>
#include <memory>
-#include <stdex/Memory.h>
-
#include <gtest/gtest.h>
-using stdex::make_unique;
+using std::make_unique;
/**
* Section: Add Op
#include "coco/IR/OutputManager.h"
-#include <stdex/Memory.h>
+#include <memory>
namespace coco
{
Output *OutputManager::create(const nncc::core::ADT::tensor::Shape &shape)
{
- auto output = stdex::make_unique<Output>(shape);
+ auto output = std::make_unique<Output>(shape);
modulize(output.get());
return take(std::move(output));
}
#include "coco/IR/Part.h"
#include "coco/IR/Op.h"
-#include <stdex/Memory.h>
+#include <memory>
#include <gtest/gtest.h>
-using stdex::make_unique;
+using std::make_unique;
namespace
{
#include "Consumer.mock.h"
-#include <stdex/Memory.h>
+#include <memory>
#include <gtest/gtest.h>
-using stdex::make_unique;
+using std::make_unique;
namespace
{
add_library(coco_generic SHARED ${SOURCES})
target_include_directories(coco_generic PUBLIC include)
target_link_libraries(coco_generic PUBLIC coco_core)
-target_link_libraries(coco_generic PRIVATE stdex)
target_link_libraries(coco_generic PRIVATE nncc_common)
if(NOT ENABLE_TEST)
GTest_AddTest(coco_generic_test ${TESTS})
target_link_libraries(coco_generic_test coco_generic)
-# stdex is a PRIVATE dependency of coco_generic, and thus is not linked to coco_generic_test
-# even though coco_generic_test is linked to coco_generic
-target_link_libraries(coco_generic_test stdex)
#include <nncc/core/ADT/kernel/NCHWLayout.h>
#include <nncc/core/ADT/kernel/Overlay.h>
-#include <stdex/Memory.h>
-
+#include <memory>
#include <map>
using namespace nncc::core::ADT;
-using stdex::make_unique;
+using std::make_unique;
namespace
{
private:
std::map<const coco::Bag *, std::unique_ptr<std::vector<uint8_t>>> _data;
};
-}
+} // namespace
namespace
{
if(DEFINED RULE_SOURCE_PATH)
# Copy .rule
add_custom_command(OUTPUT ${RULE_BINARY_PATH}
- COMMAND ${CMAKE_COMMAND} -E copy "${RULE_SOURCE_PATH}" "${RULE_BINARY_PATH}"
- DEPENDS ${RULE_SOURCE_PATH}
- COMMENT "Generate ${RULE_FILE}"
+ COMMAND ${CMAKE_COMMAND} -E copy "${RULE_SOURCE_PATH}" "${RULE_BINARY_PATH}"
+ DEPENDS ${RULE_SOURCE_PATH}
+ COMMENT "Generate ${RULE_FILE}"
)
list(APPEND TEST_DEPS ${RULE_BINARY_PATH})
endif()
list(APPEND TEST_DEPS ${TFLITE_OUTPUT_PATH})
if(NOT DEFINED NO_CIRCLIZE_${RECIPE})
- # Generate .circle
- add_custom_command(OUTPUT ${CIRCLE_OUTPUT_PATH}
- COMMAND $<TARGET_FILE:tflite2circle> ${TFLITE_OUTPUT_PATH} ${CIRCLE_OUTPUT_PATH}
- DEPENDS $<TARGET_FILE:tflite2circle> ${TFLITE_OUTPUT_PATH}
- COMMENT "Generate ${CIRCLE_FILE}"
- )
- set(MODEL_FORMAT "circle")
- list(APPEND TEST_DEPS ${CIRCLE_OUTPUT_PATH})
+ # Generate .circle
+ add_custom_command(OUTPUT ${CIRCLE_OUTPUT_PATH}
+ COMMAND $<TARGET_FILE:tflite2circle> ${TFLITE_OUTPUT_PATH} ${CIRCLE_OUTPUT_PATH}
+ DEPENDS $<TARGET_FILE:tflite2circle> ${TFLITE_OUTPUT_PATH}
+ COMMENT "Generate ${CIRCLE_FILE}"
+ )
+ set(MODEL_FORMAT "circle")
+ list(APPEND TEST_DEPS ${CIRCLE_OUTPUT_PATH})
endif()
else()
# Generate .circle
add_custom_command(OUTPUT ${CIRCLE_OUTPUT_PATH}
- COMMAND $<TARGET_FILE:circlechef-file> ${RECIPE_BINARY_PATH} ${CIRCLE_OUTPUT_PATH}
- DEPENDS $<TARGET_FILE:circlechef-file> ${RECIPE_BINARY_PATH}
- COMMENT "Generate ${CIRCLE_FILE}"
+ COMMAND $<TARGET_FILE:circlechef-file> ${RECIPE_BINARY_PATH} ${CIRCLE_OUTPUT_PATH}
+ DEPENDS $<TARGET_FILE:circlechef-file> ${RECIPE_BINARY_PATH}
+ COMMENT "Generate ${CIRCLE_FILE}"
)
list(APPEND TEST_DEPS ${CIRCLE_OUTPUT_PATH})
endif()
if(NOT DEFINED NO_OPTIMIZE_${RECIPE})
# Generate optimized .circle
add_custom_command(OUTPUT ${OPT_CIRCLE_OUTPUT_PATH}
- COMMAND $<TARGET_FILE:circle2circle> --all ${CIRCLE_OUTPUT_PATH} ${OPT_CIRCLE_OUTPUT_PATH}
+ COMMAND $<TARGET_FILE:circle2circle> --O1 ${CIRCLE_OUTPUT_PATH} ${OPT_CIRCLE_OUTPUT_PATH}
DEPENDS $<TARGET_FILE:circle2circle> ${CIRCLE_OUTPUT_PATH}
COMMENT "Generate ${OPT_CIRCLE_FILE}"
)
set(MODEL_FILE "${RECIPE}${OPT_FORMAT}.${MODEL_FORMAT}")
set(MODEL_PATH "${CMAKE_CURRENT_BINARY_DIR}/${MODEL_FILE}")
set(NNPKG_FILE "${RECIPE}${OPT_FORMAT}")
- set(NNPKG_PATH "${CMAKE_CURRENT_BINARY_DIR}/${NNPKG_FILE}")
+ set(NNPKG_DIR "${CMAKE_CURRENT_BINARY_DIR}/${NNPKG_FILE}")
+ set(NNPKG_MODEL "${NNPKG_DIR}/${MODEL_FILE}")
+
+ # Generate nnpackage directory
+ add_custom_command(OUTPUT ${NNPKG_DIR}
+ COMMAND ${CMAKE_COMMAND} -E make_directory ${NNPKG_DIR}
+ DEPENDS ${MODEL_PATH}
+ COMMENT "Generate ${RECIPE} nnpackage directory"
+ )
+ list(APPEND TEST_DEPS ${NNPKG_DIR})
- add_custom_command(OUTPUT ${NNPKG_PATH}
+ add_custom_command(OUTPUT ${NNPKG_MODEL}
COMMAND ${MODEL2NNPKG} ${MODEL_PATH}
- DEPENDS ${MODEL2NNPKG} ${MODEL_PATH}
+ DEPENDS ${MODEL2NNPKG} ${MODEL_PATH} ${NNPKG_DIR}
COMMENT "Generate ${RECIPE} nnpackage"
)
- list(APPEND TEST_DEPS ${NNPKG_PATH})
-
- set(INPUT_HDF5_FILE "${RECIPE}${OPT_FORMAT}.input.h5")
- set(INPUT_BIN_PATH "${CMAKE_CURRENT_BINARY_DIR}/${INPUT_HDF5_FILE}")
-
- set(EXPECTED_HDF5_FILE "${RECIPE}${OPT_FORMAT}.expected.h5")
- set(EXPECTED_BIN_PATH "${CMAKE_CURRENT_BINARY_DIR}/${EXPECTED_HDF5_FILE}")
+ list(APPEND TEST_DEPS ${NNPKG_MODEL})
if(NOT DEFINED NO_TCGEN_${RECIPE})
- # Generate input.h5, expected.h5
- add_custom_command(OUTPUT ${INPUT_BIN_PATH} ${EXPECTED_BIN_PATH}
- COMMAND $<TARGET_FILE:testDataGenerator> ${MODEL_FILE}
- DEPENDS $<TARGET_FILE:testDataGenerator> ${MODEL_FILE}
- COMMENT "Generate ${INPUT_BIN_PATH} and ${EXPECTED_BIN_PATH}"
- )
-
# Generate test directory
- set(TC_DIRECTORY "${NNPKG_PATH}/metadata/tc")
+ set(TC_DIRECTORY "${NNPKG_DIR}/metadata/tc")
add_custom_command(OUTPUT ${TC_DIRECTORY}
COMMAND ${CMAKE_COMMAND} -E make_directory ${TC_DIRECTORY}
- DEPENDS ${NNPKG_PATH}
+ DEPENDS ${NNPKG_DIR}
COMMENT "Generate ${RECIPE} nnpackage test directory"
)
+ list(APPEND TEST_DEPS ${TC_DIRECTORY})
- # Move input hdf5 file to test directory
- set(INPUT_NNPKG_PATH "${TC_DIRECTORY}/input.h5")
- add_custom_command(OUTPUT ${INPUT_NNPKG_PATH}
- COMMAND ${CMAKE_COMMAND} -E rename ${INPUT_BIN_PATH} ${INPUT_NNPKG_PATH}
- DEPENDS ${INPUT_BIN_PATH} ${TC_DIRECTORY}
- COMMENT "Move ${INPUT_HDF5_FILE} to nnpackage"
- )
-
- # Move expected hdf5 file to test directory
- set(EXPECTED_NNPKG_PATH "${TC_DIRECTORY}/expected.h5")
- add_custom_command(OUTPUT ${EXPECTED_NNPKG_PATH}
- COMMAND ${CMAKE_COMMAND} -E rename ${EXPECTED_BIN_PATH} ${EXPECTED_NNPKG_PATH}
- DEPENDS ${EXPECTED_BIN_PATH} ${TC_DIRECTORY}
- COMMENT "Move ${EXPECTED_HDF5_FILE} to nnpackage"
+ # Generate input.h5, expected.h5
+ set(INPUT_HDF5_FILE "${TC_DIRECTORY}/input.h5")
+ set(EXPECTED_HDF5_FILE "${TC_DIRECTORY}/expected.h5")
+ add_custom_command(OUTPUT ${INPUT_HDF5_FILE} ${EXPECTED_HDF5_FILE}
+ COMMAND $<TARGET_FILE:testDataGenerator> --input_data ${INPUT_HDF5_FILE} --expected_data ${EXPECTED_HDF5_FILE} ${MODEL_FILE}
+ DEPENDS $<TARGET_FILE:testDataGenerator> ${MODEL_FILE} ${TC_DIRECTORY}
+ COMMENT "Generate ${INPUT_HDF5_FILE} and ${EXPECTED_HDF5_FILE}"
)
- list(APPEND TEST_DEPS ${TC_DIRECTORY} ${INPUT_BIN_PATH} ${EXPECTED_BIN_PATH}
- ${INPUT_NNPKG_PATH} ${EXPECTED_NNPKG_PATH})
+ list(APPEND TEST_DEPS ${INPUT_HDF5_FILE} ${EXPECTED_HDF5_FILE})
endif()
endforeach()
tcgenerate(BatchMatMulV2_000)
tcgenerate(BatchMatMulV2_001)
tcgenerate(BatchToSpaceND_000)
+tcgenerate(BroadcastTo_000) # luci-interpreter doesn't support custom operator
tcgenerate(Cast_000)
tcgenerate(Cast_001)
tcgenerate(Ceil_000)
tcgenerate(ExpandDims_001)
tcgenerate(ExpandDims_002)
tcgenerate(ExpandDims_003)
+tcgenerate(ExpandDims_004)
+tcgenerate(FakeQuant_000) # runtime and luci-interpreter doesn't support yet
tcgenerate(Fill_000)
tcgenerate(Fill_001)
tcgenerate(FloorMod_000)
tcgenerate(MaxPoolWithArgMax_000)
tcgenerate(MaxPoolWithArgMax_001)
tcgenerate(MaxPoolWithArgMax_002)
+tcgenerate(Mean_dynamic_000) # TestDataGenerator does not support unknown dimension
+tcgenerate(Mean_dynamic_001) # TestDataGenerator does not support unknown dimension
+tcgenerate(Mean_U8_dynamic_000) # TestDataGenerator does not support unknown dimension
tcgenerate(NonMaxSuppressionV4_000)
tcgenerate(NonMaxSuppressionV4_001)
tcgenerate(NonMaxSuppressionV5_000)
tcgenerate(MirrorPad_000)
tcgenerate(Mul_U8_000)
tcgenerate(Neg_000)
+tcgenerate(Net_BroadcastTo_AddV2_001) # luci-interpreter doesn't support custom operator
tcgenerate(Net_Dangle_001)
-tcgenerate(Net_InstanceNorm_001)
-tcgenerate(Net_InstanceNorm_002)
-tcgenerate(Net_InstanceNorm_003)
tcgenerate(Net_ZeroDim_001) # luci-interpreter doesn't support zero dim
tcgenerate(OneHot_000)
tcgenerate(OneHot_001)
tcgenerate(ReduceAny_001)
tcgenerate(ReduceAny_002)
tcgenerate(ReduceAny_003)
-tcgenerate(ReduceAny_dynamic_000)
-tcgenerate(ReduceAny_dynamic_001)
-tcgenerate(ReduceAny_dynamic_002)
-tcgenerate(ReduceAny_dynamic_003)
+tcgenerate(ReduceAny_dynamic_000) # TestDataGenerator does not support unknown dimension
+tcgenerate(ReduceAny_dynamic_001) # TestDataGenerator does not support unknown dimension
+tcgenerate(ReduceAny_dynamic_002) # TestDataGenerator does not support unknown dimension
+tcgenerate(ReduceAny_dynamic_003) # TestDataGenerator does not support unknown dimension
tcgenerate(ReduceMax_000)
-tcgenerate(ReduceMax_dynamic_000)
+tcgenerate(ReduceMax_dynamic_000) # TestDataGenerator does not support unknown dimension
tcgenerate(ReduceMin_000)
-tcgenerate(ReduceMin_dynamic_000)
+tcgenerate(ReduceMin_dynamic_000) # TestDataGenerator does not support unknown dimension
tcgenerate(ReduceProd_000)
tcgenerate(ReduceProd_001)
tcgenerate(ReduceProd_002)
tcgenerate(ReduceProd_003)
-tcgenerate(ReduceProd_dynamic_000)
-tcgenerate(ReduceProd_dynamic_001)
-tcgenerate(ReduceProd_dynamic_002)
-tcgenerate(ReduceProd_dynamic_003)
+tcgenerate(ReduceProd_dynamic_000) # TestDataGenerator does not support unknown dimension
+tcgenerate(ReduceProd_dynamic_001) # TestDataGenerator does not support unknown dimension
+tcgenerate(ReduceProd_dynamic_002) # TestDataGenerator does not support unknown dimension
+tcgenerate(ReduceProd_dynamic_003) # TestDataGenerator does not support unknown dimension
+tcgenerate(ReLU_dynamic_000) # TestDataGenerator does not support unknown dimension
+tcgenerate(ReLU6_dynamic_000) # TestDataGenerator does not support unknown dimension
tcgenerate(ReLUN1To1_000)
-tcgenerate(ReLUN1To1_dynamic_000)
+tcgenerate(ReLUN1To1_dynamic_000) # TestDataGenerator does not support unknown dimension
tcgenerate(Reshape_003) # luci-interpreter doesn't support reshape without built-in option
tcgenerate(ReverseSequence_000)
tcgenerate(ReverseV2_000)
tcgenerate(SelectV2_002)
tcgenerate(Shape_000)
tcgenerate(Sin_000)
+tcgenerate(Slice_001) # luci-interpreter doesn't support Slice with -1
tcgenerate(SpaceToBatchND_000)
tcgenerate(SpaceToBatchND_001)
tcgenerate(SpaceToBatchND_002)
tcgenerate(SparseToDense_000)
tcgenerate(SplitV_000)
tcgenerate(Square_000)
-tcgenerate(SquaredDifference_000)
tcgenerate(Sum_000)
tcgenerate(Sum_001)
-tcgenerate(Sum_dynamic_000)
-tcgenerate(Sum_dynamic_001)
+tcgenerate(Sum_dynamic_000) # TestDataGenerator does not support unknown dimension
+tcgenerate(Sum_dynamic_001) # TestDataGenerator does not support unknown dimension
tcgenerate(Tile_000)
tcgenerate(Tile_U8_000)
tcgenerate(TopKV2_000)
uint32_t element_num(std::vector<hsize_t> &vec)
{
return static_cast<uint32_t>(
- std::accumulate(std::begin(vec), std::end(vec), 1, std::multiplies<uint32_t>()));
+ std::accumulate(std::begin(vec), std::end(vec), 1, std::multiplies<uint32_t>()));
}
H5::PredType hdf5_dtype_cast(const loco::DataType loco_dtype)
{
arser::Arser arser;
arser.add_argument("circle").type(arser::DataType::STR).help("Circle file you want to test");
+ arser.add_argument("--input_data")
+ .required(true)
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .help("Path to generate input data h5 file");
+ arser.add_argument("--expected_data")
+ .required(true)
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .help("Path to generate expected data h5 file");
arser.add_argument("--fixed_seed")
- .required(false)
- .nargs(0)
- .help("Put a fixed seed into the random number generator");
+ .required(false)
+ .nargs(0)
+ .help("Put a fixed seed into the random number generator");
try
{
}
std::string circle_file = arser.get<std::string>("circle");
- size_t last_dot_index = circle_file.find_last_of(".");
- std::string prefix = circle_file.substr(0, last_dot_index);
// load circle file
foder::FileLoader file_loader{circle_file};
* ã„´DATA ...
*/
// create random data and dump into hdf5 file
- H5::H5File input_file{prefix + ".input.h5", H5F_ACC_TRUNC};
+ H5::H5File input_file{arser.get<std::string>("--input_data"), H5F_ACC_TRUNC};
std::unique_ptr<H5::Group> input_name_group =
- std::make_unique<H5::Group>(input_file.createGroup("name"));
+ std::make_unique<H5::Group>(input_file.createGroup("name"));
std::unique_ptr<H5::Group> input_value_group =
- std::make_unique<H5::Group>(input_file.createGroup("value"));
+ std::make_unique<H5::Group>(input_file.createGroup("value"));
- H5::H5File output_file{prefix + ".expected.h5", H5F_ACC_TRUNC};
+ H5::H5File output_file{arser.get<std::string>("--expected_data"), H5F_ACC_TRUNC};
std::unique_ptr<H5::Group> output_name_group =
- std::make_unique<H5::Group>(output_file.createGroup("name"));
+ std::make_unique<H5::Group>(output_file.createGroup("name"));
std::unique_ptr<H5::Group> output_value_group =
- std::make_unique<H5::Group>(output_file.createGroup("value"));
+ std::make_unique<H5::Group>(output_file.createGroup("value"));
std::random_device rd; // used to obtain a seed for the random number engine
uint32_t input_index = 0;
auto dataspace = std::make_unique<H5::DataSpace>(dims.size(), dims.data());
auto dtype = hdf5_dtype_cast(input_node->dtype());
auto dataset = std::make_unique<H5::DataSet>(
- input_file.createDataSet("value/" + std::to_string(input_index), dtype, *dataspace));
+ input_file.createDataSet("value/" + std::to_string(input_index), dtype, *dataspace));
auto data_size = ::element_num(dims);
auto dtype_size = loco::size(input_node->dtype());
auto dataspace = std::make_unique<H5::DataSpace>(dims.size(), dims.data());
auto dtype = hdf5_dtype_cast(output_node->dtype());
auto dataset = std::make_unique<H5::DataSet>(
- output_file.createDataSet("value/" + std::to_string(output_index), dtype, *dataspace));
+ output_file.createDataSet("value/" + std::to_string(output_index), dtype, *dataspace));
uint32_t tensor_bytesize = loco::size(output_node->dtype());
tensor_bytesize *= ::element_num(dims);
--- /dev/null
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(crew STATIC ${SOURCES})
+target_include_directories(crew PRIVATE src)
+target_include_directories(crew PUBLIC include)
+target_link_libraries(crew PRIVATE foder)
+target_link_libraries(crew PRIVATE nncc_common)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(crew_test ${TESTS})
+target_include_directories(crew_test PRIVATE src)
+target_link_libraries(crew_test nncc_common)
+target_link_libraries(crew_test crew)
--- /dev/null
+# crew
+
+_crew_ is circle partitioning Configuration REader and Writer library.
+
+### Support formats
+
+Current _crew_ supports below format and functionalities.
+- INI read
+- INI write
+- JSON write
+
+_crew_ supports limited portion of JSON and INI formats just enough to access
+circle partition configuration files.
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CREW_PCONFIG_H__
+#define __CREW_PCONFIG_H__
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+namespace crew
+{
+
+struct Part
+{
+ std::string model_file;
+ std::vector<std::string> inputs;
+ std::vector<std::string> outputs;
+};
+
+using Parts = std::vector<Part>;
+using Source = Part;
+
+struct PConfig
+{
+ Source source;
+ Parts parts;
+};
+
+/**
+ * @brief Read config as ini file, return false if failed
+ */
+bool read_ini(const std::string &path, PConfig &config);
+
+/**
+ * @brief Write config as ini file, return false if failed
+ */
+bool write_ini(std::ostream &os, const PConfig &config);
+
+/**
+ * @brief Write config as json file, return false if failed
+ */
+bool write_json(std::ostream &os, const PConfig &config);
+
+} // namespace crew
+
+#endif // __CREW_PCONFIG_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CREW_PCONFIG_INI_H__
+#define __CREW_PCONFIG_INI_H__
+
+#include <iostream>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace crew
+{
+
+using KeyValues = std::unordered_map<std::string, std::string>;
+
+struct Section
+{
+ std::string name;
+ KeyValues items;
+};
+
+using Sections = std::vector<Section>;
+
+/**
+ * @brief Reads Config INI from null terminated string and return Sections
+ */
+Sections read_ini(const char *data, size_t length);
+/**
+ * @brief Reads Config INI from file and return Sections
+ */
+Sections read_ini(const std::string &path);
+
+/**
+ * @brief Write Config INI with Sections to ostream
+ */
+void write_ini(std::ostream &os, const Sections §ions);
+/**
+ * @brief Write Config INI with Sections to file, throw if failed
+ */
+void write_ini(const std::string &path, const Sections §ions);
+
+/**
+ * @brief Find a section with name, empty section if not found
+ */
+Section find(const Sections §ions, const std::string &name);
+
+/**
+ * @brief Find a key-value pair from key and return value, empty string if not found
+ */
+std::string find(const Section §ion, const std::string &key);
+
+} // namespace crew
+
+#endif // __CREW_PCONFIG_INI_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CREW_PCONFIG_INI_DUMP_H__
+#define __CREW_PCONFIG_INI_DUMP_H__
+
+#include "PConfigIni.h"
+
+#include <iostream>
+
+namespace crew
+{
+
+void dump(std::ostream &os, const Sections §ions);
+
+} // namespace crew
+
+std::ostream &operator<<(std::ostream &os, const crew::Sections §ions);
+
+#endif // __CREW_PCONFIG_INI_DUMP_H__
--- /dev/null
+require("foder")
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "crew/PConfig.h"
+#include "crew/PConfigIni.h"
+
+#include "PConfigJson.h"
+
+#include <utility>
+
+namespace
+{
+
+bool read_part(const crew::Section §ion, crew::Part &part)
+{
+ // construct Source from section_source
+ part.model_file = crew::find(section, "file");
+ if (part.model_file.empty())
+ return false;
+
+ // read inputs for Source
+ for (int32_t i = 1;; ++i)
+ {
+ std::string item = "i" + std::to_string(i);
+ std::string input = crew::find(section, item);
+ if (input.empty())
+ break;
+
+ part.inputs.push_back(input);
+ }
+ // read outputs for Source
+ for (int32_t i = 1;; ++i)
+ {
+ std::string item = "o" + std::to_string(i);
+ std::string output = crew::find(section, item);
+ if (output.empty())
+ break;
+
+ part.outputs.push_back(output);
+ }
+ return true;
+}
+
+} // namespace
+
+namespace
+{
+
+void write_part(crew::JsonExport &je, const crew::Part &part)
+{
+ std::vector<std::string> graph_inputs;
+ std::vector<std::string> graph_outputs;
+
+ for (auto &input : part.inputs)
+ {
+ graph_inputs.push_back(input);
+ }
+ for (auto &output : part.outputs)
+ {
+ graph_outputs.push_back(output);
+ }
+
+ je.key_val("file", part.model_file.c_str(), true);
+ je.key_val("inputs", graph_inputs, true);
+ je.key_val("outputs", graph_outputs, false);
+}
+
+void write_parts(crew::JsonExport &je, const crew::Parts &parts)
+{
+ uint32_t idx = 1;
+ uint32_t size = parts.size();
+ for (auto &part : parts)
+ {
+ je.open_brace();
+ write_part(je, part);
+ je.close_brace(idx < size);
+ idx++;
+ }
+}
+
+} // namespace
+
+namespace
+{
+
+void part_to_section_io(const crew::Part &part, crew::Section §ion)
+{
+ uint32_t idx = 1;
+ for (auto &input : part.inputs)
+ {
+ std::string key = "i" + std::to_string(idx);
+ section.items.emplace(key, input);
+ idx++;
+ }
+ idx = 1;
+ for (auto &output : part.outputs)
+ {
+ std::string key = "o" + std::to_string(idx);
+ section.items.emplace(key, output);
+ idx++;
+ }
+}
+
+} // namespace
+
+namespace crew
+{
+
+bool read_ini(const std::string &path, PConfig &pconfig)
+{
+ auto sections = crew::read_ini(path);
+
+ auto section_source = crew::find(sections, "source");
+ auto section_models = crew::find(sections, "models");
+ if (section_source.name != "source" || section_models.name != "models")
+ {
+ return false;
+ }
+
+ if (!read_part(section_source, pconfig.source))
+ {
+ return false;
+ }
+
+ // get models list
+ std::vector<std::string> models;
+ for (int32_t i = 1;; ++i)
+ {
+ std::string item = "m" + std::to_string(i);
+ std::string model = crew::find(section_models, item);
+ if (model.empty())
+ break;
+
+ models.push_back(model);
+ }
+
+ for (auto &model : models)
+ {
+ auto section_model = crew::find(sections, model);
+
+ Part part;
+ if (!read_part(section_model, part))
+ {
+ return false;
+ }
+ pconfig.parts.push_back(part);
+ }
+
+ return true;
+}
+
+bool write_ini(std::ostream &os, const PConfig &pconfig)
+{
+ crew::Sections sections;
+
+ // make [source]
+ crew::Section section_source;
+ section_source.name = "source";
+ section_source.items["file"] = pconfig.source.model_file;
+ part_to_section_io(pconfig.source, section_source);
+ sections.push_back(section_source);
+
+ // make [models]
+ crew::Section section_models;
+ section_models.name = "models";
+ uint32_t idx = 1;
+ for (auto &part : pconfig.parts)
+ {
+ std::string key = "m" + std::to_string(idx);
+ section_models.items[key] = part.model_file;
+ idx++;
+ }
+ sections.push_back(section_models);
+
+ for (auto &part : pconfig.parts)
+ {
+ // make circle model section
+ crew::Section section_model;
+ section_model.name = part.model_file;
+ section_model.items["file"] = part.model_file;
+ part_to_section_io(part, section_model);
+ sections.push_back(section_model);
+ }
+
+ write_ini(os, sections);
+
+ return true;
+}
+
+bool write_json(std::ostream &os, const PConfig &pconfig)
+{
+ crew::JsonExport je(os);
+
+ je.open_brace();
+ {
+ je.open_brace("source");
+ write_part(je, pconfig.source);
+ je.close_brace(true);
+ }
+ {
+ je.open_bracket("parts");
+ write_parts(je, pconfig.parts);
+ je.close_bracket(false);
+ }
+ je.close_brace(false);
+
+ return true;
+}
+
+} // namespace crew
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "crew/PConfigIni.h"
+#include "crew/PConfigIniDump.h"
+
+#include <foder/FileLoader.h>
+
+#include <cassert>
+#include <cstring>
+#include <fstream>
+#include <memory>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+
+namespace crew
+{
+
+Sections read_ini(const char *data, size_t length)
+{
+ assert(data != nullptr);
+ assert(length > 0);
+
+ auto buffer = std::vector<char>();
+ buffer.reserve(length + 1);
+ char *pbuffer = buffer.data();
+ memcpy(pbuffer, data, length);
+ // add null at end to be sure
+ *(pbuffer + length) = 0;
+
+ Sections sections;
+ Section section;
+
+ std::string string_line;
+
+ const char *delim = "\r\n";
+ const char *one_line = std::strtok(pbuffer, delim);
+ while (one_line != nullptr)
+ {
+ if (*one_line == '[')
+ {
+ if (!section.name.empty())
+ {
+ sections.push_back(section);
+ }
+ section.name.clear();
+ section.items.clear();
+
+ string_line = one_line + 1;
+ auto pos = string_line.find(']');
+ assert(pos != std::string::npos);
+ if (pos != std::string::npos)
+ {
+ section.name = string_line.substr(0, pos);
+ }
+ }
+ else if (*one_line == '#' || *one_line == ';')
+ {
+ // Comment line, do nothing
+ }
+ else if (*one_line) // string legnth is not 0
+ {
+ if (section.name.empty())
+ throw std::runtime_error("Invalid INI file");
+
+ string_line = one_line;
+ auto pos = string_line.find('=');
+ assert(pos != std::string::npos);
+ if (pos != std::string::npos)
+ {
+ auto key = string_line.substr(0, pos);
+ auto val = string_line.substr(pos + 1);
+ section.items.emplace(key, val);
+ }
+ }
+
+ one_line = std::strtok(nullptr, delim);
+ }
+ if (!section.name.empty())
+ {
+ sections.push_back(section);
+ }
+
+ return sections;
+}
+
+Sections read_ini(const std::string &path)
+{
+ foder::FileLoader file_loader{path};
+ // load will throw if error while opening
+ auto ini_data = file_loader.load();
+
+ return read_ini(ini_data.data(), ini_data.size());
+}
+
+void write_ini(std::ostream &os, const Sections §ions)
+{
+ std::stringstream ss;
+
+ ss << sections;
+
+ std::string strss = ss.str();
+
+ os.write(strss.c_str(), strss.length());
+}
+
+void write_ini(const std::string &filepath, const Sections §ions)
+{
+ std::ofstream fs(filepath.c_str(), std::ofstream::binary | std::ofstream::trunc);
+ if (not fs.good())
+ {
+ std::string msg = "Failed to create file: " + filepath;
+ throw std::runtime_error(msg);
+ }
+
+ write_ini(fs, sections);
+
+ fs.close();
+}
+
+Section find(const Sections §ions, const std::string &name)
+{
+ for (auto §ion : sections)
+ {
+ if (section.name == name)
+ return section;
+ }
+ Section not_found;
+ return not_found;
+}
+
+std::string find(const Section §ion, const std::string &key)
+{
+ for (auto &item : section.items)
+ {
+ if (item.first == key)
+ return item.second;
+ }
+ return "";
+}
+
+} // namespace crew
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "crew/PConfigIni.h"
+#include "crew/PConfigIniDump.h"
+
+#include <gtest/gtest.h>
+
+#include <sstream>
+#include <stdexcept>
+
+TEST(ConfigIniTest, read_ini_non_exist_file)
+{
+ EXPECT_THROW(crew::read_ini("/hello/world/not_a_file"), std::runtime_error);
+}
+
+TEST(ConfigIniTest, read_ini_simple)
+{
+ std::stringstream ss;
+
+ ss << "[hello]\nkey=world\n";
+
+ auto str = ss.str();
+ auto sections = crew::read_ini(str.c_str(), str.length());
+ ASSERT_EQ(1UL, sections.size());
+
+ auto its = sections.begin();
+ ASSERT_NE(sections.end(), its);
+ EXPECT_TRUE("hello" == its->name);
+ ASSERT_EQ(1UL, its->items.size());
+
+ auto it = its->items.begin();
+ ASSERT_NE(its->items.end(), it);
+ EXPECT_TRUE("key" == it->first);
+ EXPECT_TRUE("world" == it->second);
+}
+
+TEST(ConfigIniTest, read_ini_simple_NEG)
+{
+ std::stringstream ss;
+
+ ss << "key=value\nhello=world\n";
+
+ auto str = ss.str();
+
+ EXPECT_THROW(crew::read_ini(str.c_str(), str.length()), std::runtime_error);
+}
+
+TEST(ConfigIniTest, read_ini_comment)
+{
+ std::stringstream ss;
+
+ ss << "[hello]\n;comment=skip\n#comment=skip\nkey=world\n";
+
+ auto str = ss.str();
+ auto sections = crew::read_ini(str.c_str(), str.length());
+ ASSERT_EQ(1UL, sections.size());
+
+ auto its = sections.begin();
+ ASSERT_NE(sections.end(), its);
+ EXPECT_TRUE("hello" == its->name);
+ ASSERT_EQ(1UL, its->items.size());
+
+ auto it = its->items.begin();
+ ASSERT_NE(its->items.end(), it);
+ EXPECT_TRUE("key" == it->first);
+ EXPECT_TRUE("world" == it->second);
+}
+
+TEST(ConfigIniTest, write_ini_file_error_NEG)
+{
+ crew::Sections sections;
+ EXPECT_THROW(crew::write_ini("/abc/def/cannot_access", sections), std::runtime_error);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "crew/PConfigIniDump.h"
+
+namespace crew
+{
+
+/**
+ * @brief Dump content of sections
+ */
+void dump(std::ostream &os, const Sections §ions)
+{
+ for (auto §ion : sections)
+ {
+ os << "[" << section.name << "]" << std::endl;
+ for (auto &item : section.items)
+ {
+ os << item.first << "=" << item.second << std::endl;
+ }
+ os << std::endl;
+ }
+}
+
+} // namespace crew
+
+std::ostream &operator<<(std::ostream &os, const crew::Sections §ions)
+{
+ crew::dump(os, sections);
+ return os;
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "crew/PConfigIni.h"
+#include "crew/PConfigIniDump.h"
+
+#include <gtest/gtest.h>
+
+#include <sstream>
+#include <stdexcept>
+
+TEST(ConfigIniDumpTest, dump_sections)
+{
+ crew::Sections sections;
+ crew::Section section;
+
+ section.name = "hello";
+ section.items["key"] = "value";
+
+ sections.push_back(section);
+
+ std::stringstream ss;
+
+ ss << sections;
+
+ // there's extra \n at end of each section
+ ASSERT_TRUE(ss.str() == "[hello]\nkey=value\n\n");
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PConfigJson.h"
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+namespace
+{
+
+const char _CLF = '\n'; // Control Line Feed
+const char _DQU = '\"'; // Double QUotation
+
+} // namespace
+
+namespace crew
+{
+
+void JsonExport::indent(void)
+{
+ for (uint32_t i = 0; i < _indent; ++i)
+ _os << " ";
+}
+
+void JsonExport::open_brace(void)
+{
+ indent();
+
+ _os << "{" << _CLF;
+ _indent++;
+}
+
+void JsonExport::open_brace(const std::string &key)
+{
+ indent();
+
+ _os << _DQU << key << _DQU << " : {" << _CLF;
+ _indent++;
+}
+
+void JsonExport::open_bracket(const std::string &key)
+{
+ indent();
+
+ _os << _DQU << key << _DQU << " : [" << _CLF;
+ _indent++;
+}
+
+void JsonExport::close_bracket(bool cont)
+{
+ _indent--;
+ indent();
+
+ _os << "]";
+ if (cont)
+ _os << ",";
+ _os << _CLF;
+}
+
+void JsonExport::close_brace(bool cont)
+{
+ _indent--;
+ indent();
+
+ _os << "}";
+ if (cont)
+ _os << ",";
+ _os << _CLF;
+}
+
+void JsonExport::key_val(const std::string &key, const std::string &value, bool cont)
+{
+ indent();
+
+ _os << _DQU << key << _DQU << " : " << _DQU << value << _DQU;
+ if (cont)
+ _os << ",";
+ _os << _CLF;
+}
+
+void JsonExport::key_val(const std::string &key, const std::vector<std::string> &l, bool cont)
+{
+ indent();
+
+ _os << _DQU << key << _DQU << " : [ ";
+ bool comma = false;
+ for (auto &v : l)
+ {
+ if (comma)
+ _os << ", ";
+ else
+ comma = true;
+ _os << _DQU << v << _DQU;
+ }
+ _os << " ]";
+ if (cont)
+ _os << ",";
+ _os << _CLF;
+}
+
+} // namespace crew
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CREW_PCONFIG_JSON_H__
+#define __CREW_PCONFIG_JSON_H__
+
+#include <ostream>
+#include <string>
+#include <vector>
+
+namespace crew
+{
+
+class JsonExport
+{
+public:
+ JsonExport(std::ostream &os) : _os(os) {}
+
+private:
+ void indent(void);
+
+public:
+ void open_brace(void);
+ void open_brace(const std::string &key);
+ void open_bracket(const std::string &key);
+ void close_bracket(bool cont);
+ void close_brace(bool cont);
+ void key_val(const std::string &key, const std::string &value, bool cont);
+ void key_val(const std::string &key, const std::vector<std::string> &l, bool cont);
+
+private:
+ std::ostream &_os;
+ uint32_t _indent = 0;
+};
+
+} // namespace crew
+
+#endif // __CREW_PCONFIG_JSON_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PConfigJson.h"
+
+#include <gtest/gtest.h>
+
+#include <sstream>
+
+TEST(ConfigJsonTest, empty)
+{
+ std::stringstream ss;
+ crew::JsonExport je(ss);
+
+ je.open_brace();
+ je.close_brace(true);
+
+ ASSERT_TRUE(ss.str() == "{\n},\n");
+}
+
+TEST(ConfigJsonTest, keyvalue)
+{
+ std::stringstream ss;
+ crew::JsonExport je(ss);
+
+ je.open_brace("hello");
+ je.key_val("key", "value", true);
+ je.close_brace(true);
+
+ ASSERT_TRUE(ss.str() == "\"hello\" : {\n \"key\" : \"value\",\n},\n");
+}
+
+TEST(ConfigJsonTest, keyvaluearray)
+{
+ std::stringstream ss;
+ crew::JsonExport je(ss);
+ std::vector<std::string> vs = {"1", "2"};
+
+ je.open_brace("hello");
+ je.key_val("key", vs, true);
+ je.close_brace(true);
+
+ ASSERT_TRUE(ss.str() == "\"hello\" : {\n \"key\" : [ \"1\", \"2\" ],\n},\n");
+}
+
+TEST(ConfigJsonTest, bracket)
+{
+ std::stringstream ss;
+ crew::JsonExport je(ss);
+
+ je.open_bracket("hello");
+ je.close_bracket(true);
+
+ ASSERT_TRUE(ss.str() == "\"hello\" : [\n],\n");
+}
return fd;
}
-} // namespace make_temp
+} // namespace
TEST(FildesTest, default_constructor)
{
target_link_libraries(enco-cli enco_intf_cmdline)
target_link_libraries(enco-cli enco_intf_frontend)
target_link_libraries(enco-cli enco_core)
-target_link_libraries(enco-cli stdex)
target_link_libraries(enco-cli dl)
# Let's use project-wide compile options
target_link_libraries(enco-cli nncc_common)
} // namespace
-#include <stdex/Memory.h>
-
+#include <memory>
#include <map>
#include <iostream>
std::map<std::string, std::function<void(const std::string &arg)>> argparse;
argparse["--frontend"] = [&](const std::string &path) {
- frontend_zone = stdex::make_unique<FrontendZone>(path);
+ frontend_zone = std::make_unique<FrontendZone>(path);
};
argparse["--frontend-arg"] = [&](const std::string &arg) { frontend_zone->append(arg); };
# These libraries are linked for internal use, and thus does not appear in public headers.
target_link_libraries(enco_core PRIVATE pp)
target_link_libraries(enco_core PRIVATE morph)
-target_link_libraries(enco_core PRIVATE stdex)
# Let's use nncc project-wide build options
target_link_libraries(enco_core PRIVATE nncc_common)
{
public:
ANNBinder(coco::Block *block, std::unique_ptr<ann::Module> &&module)
- : _block{block}, _module{std::move(module)}
+ : _block{block}, _module{std::move(module)}
{
// DO NOTHING
}
#include "ANN/Context.h"
-#include <stdex/Memory.h>
+#include <memory>
ANNBinder *ANNContext::create(coco::Block *blk)
{
- auto mod = stdex::make_unique<ann::Module>();
- auto obj = stdex::make_unique<ANNBinder>(blk, std::move(mod));
+ auto mod = std::make_unique<ann::Module>();
+ auto obj = std::make_unique<ANNBinder>(blk, std::move(mod));
auto ptr = obj.get();
_binders.emplace_back(std::move(obj));
protected:
std::unique_ptr<coco::Module> m;
};
-}
+} // namespace
TEST_F(ANNContextTest, constructor)
{
#include "ANN/IR/OperandInventory.h"
-#include <stdex/Memory.h>
+#include <memory>
-using stdex::make_unique;
+using std::make_unique;
namespace ann
{
public:
Operation(const Code &code, std::initializer_list<OperandID> inputs,
std::initializer_list<OperandID> outputs)
- : _code{code}, _inputs{inputs}, _outputs{outputs}
+ : _code{code}, _inputs{inputs}, _outputs{outputs}
{
// DO NOTHING
}
#include "OperationInventory.h"
-#include <stdex/Memory.h>
+#include <memory>
-using stdex::make_unique;
+using std::make_unique;
namespace ann
{
#include "WeightInventory.h"
-#include <stdex/Memory.h>
+#include <memory>
-using stdex::make_unique;
+using std::make_unique;
namespace ann
{
{
public:
AsmCode(const std::string &filename, const std::string &varname)
- : _filename{filename}, _varname{varname}
+ : _filename{filename}, _varname{varname}
{
// DO NOTHING
}
#include "Transforms/Split.h"
#include "Transforms/GlobalDataGeneration.h"
-#include <stdex/Memory.h>
-
+#include <memory>
#include <stdexcept>
#include <iostream>
#include <fstream>
-using stdex::make_unique;
+using std::make_unique;
using namespace enco;
namespace
ofs << CppCode{data_var, code(sess)} << std::endl;
}
-} // namespace enco
+} // namespace
#include <iostream>
public:
CodeIndex(const coco::BlockIndex &blk_ind, const coco::InstrIndex &ins_ind)
- : _blk_ind{blk_ind}, _ins_ind{ins_ind}
+ : _blk_ind{blk_ind}, _ins_ind{ins_ind}
{
}
#include <pp/EnclosedDocument.h>
-#include <stdex/Memory.h>
-
+#include <memory>
#include <map>
#include <string>
public:
TransferLoop(uint32_t count, uint32_t src_step, uint32_t dst_step)
- : _count{count}, _step{src_step, dst_step}
+ : _count{count}, _step{src_step, dst_step}
{
// DO NOTHING
}
{
InstrPrinter prn{_mem};
- auto res = stdex::make_unique<pp::LinearDocument>();
+ auto res = std::make_unique<pp::LinearDocument>();
for (auto ins = blk->instr()->head(); ins; ins = ins->next())
{
#include <pp/LinearDocument.h>
-#include <stdex/Memory.h>
-
+#include <memory>
#include <sstream>
-using stdex::make_unique;
+using std::make_unique;
using enco::concat;
#define S(content) #content
{
public:
ScalarOperandDecl(const std::string &model, const ann::DType &dtype)
- : _model{model}, _dtype{dtype}
+ : _model{model}, _dtype{dtype}
{
// DO NOTHING
}
public:
TensorOperandDecl(const std::string &model, const ann::DType &dtype,
const nncc::core::ADT::tensor::Shape &shape)
- : _model{model}, _dtype{dtype}, _shape{shape}
+ : _model{model}, _dtype{dtype}, _shape{shape}
{
// DO NOTHING
}
public:
WeightDecl(const std::string &model, const ann::OperandID &id, const std::string &base,
const std::string &size)
- : _model{model}, _id{id}, _base{base}, _size{size}
+ : _model{model}, _id{id}, _base{base}, _size{size}
{
// DO NOTHING
}
#include "Session.h"
-#include <stdex/Memory.h>
-
#include <map>
#include <memory>
-using stdex::make_unique;
+using std::make_unique;
namespace
{
}
SectionBuilder section(const std::string &tag) { return SectionBuilder{tag}; }
-}
+} // namespace
/**
* SECTION: Bag
#include "FeatureUnification.h"
#include "IRUtils.h"
-#include <stdex/Memory.h>
-
+#include <memory>
#include <set>
#include <vector>
#include <cassert>
-using stdex::make_unique;
+using std::make_unique;
namespace
{
#include "Split.h"
#include "Dims.h"
-#include <stdex/Memory.h>
-
+#include <memory>
#include <map>
-using stdex::make_unique;
+using std::make_unique;
namespace
{
#include <coco/IR.h>
#include <nncc/core/ADT/kernel/NHWCLayout.h>
-#include <stdex/Memory.h>
+#include <memory>
#include <map>
#include <stdexcept>
#include <functional>
-using stdex::make_unique;
+using std::make_unique;
namespace
{
auto ofm = binder->addOperand<float>(_ofm);
binder->addOperation(
- ann::Operation::Code::DEPTHWISE_CONV_2D,
- {ifm, ker, bias, left, right, top, bottom, hstride, vstride, multiplier, fuse}, {ofm});
+ ann::Operation::Code::DEPTHWISE_CONV_2D,
+ {ifm, ker, bias, left, right, top, bottom, hstride, vstride, multiplier, fuse}, {ofm});
}
private:
void run(const SessionID &sess) const override { split_into_phases(code(sess)); }
};
-} // namespace enco;
+} // namespace enco
#endif // __SPLIT_H__
target_link_libraries(enco_caffe_frontend enco_intf_cmdline)
target_link_libraries(enco_caffe_frontend morph)
target_link_libraries(enco_caffe_frontend caffeproto)
-target_link_libraries(enco_caffe_frontend stdex)
nnas_find_package(GTest QUIET)
explicit GraphBuilderContext(coco::Module *module, coco::Data *data, coco::Block *block,
ShapeContext &shape_ctx, StoreContext &bag_ctx,
WeightContext &weight_ctx)
- : _module(module), _data(data), _block(block), _shape_ctx(shape_ctx), _bag_ctx(bag_ctx),
- _weight_ctx(weight_ctx)
+ : _module(module), _data(data), _block(block), _shape_ctx(shape_ctx), _bag_ctx(bag_ctx),
+ _weight_ctx(weight_ctx)
{
// DO NOTHING
}
#include <cmdline/View.h>
-#include <stdex/Memory.h>
-
+#include <memory>
#include <fstream>
#include <cassert>
{
assert(cmdline.size() == 2);
- auto frontend = stdex::make_unique<Frontend>();
+ auto frontend = std::make_unique<Frontend>();
// Fill prototxt
{
#include "Layer/Scale.h"
#include "Layer/BatchNorm.h"
-#include <stdex/Memory.h>
+#include <memory>
-using stdex::make_unique;
+using std::make_unique;
namespace caffeimport
{
auto ker_dst = data->f32()->access(ker_obj);
auto ker_src = kernel::OverlayFactory<float, kernel::NCHWLayout>::make(
- ker_obj->shape(), ker_blob->mutable_data()->begin());
+ ker_obj->shape(), ker_blob->mutable_data()->begin());
for (uint32_t n = 0; n < ker_obj->shape().count(); ++n)
{
target_link_libraries(enco_tflite_frontend enco_intf_cmdline)
target_link_libraries(enco_tflite_frontend flatbuffers)
target_link_libraries(enco_tflite_frontend enco_tflite_schema)
-target_link_libraries(enco_tflite_frontend stdex)
target_link_libraries(enco_tflite_frontend morph)
target_link_libraries(enco_tflite_frontend cwrap)
}
TflOpCodeContext::TflOpCodeContext(
- const flatbuffers::Vector<flatbuffers::Offset<tflite::OperatorCode>> *opcodes)
+ const flatbuffers::Vector<flatbuffers::Offset<tflite::OperatorCode>> *opcodes)
{
for (const tflite::OperatorCode *opcode : *opcodes)
{
explicit GraphBuilderContext(coco::Module *m, coco::Data *d, coco::Block *block,
TensorBags &tensor_bags, TensorContext &tensor_context,
TflBufferContext &buffer_context, const tflite::SubGraph *graph)
- : _m(m), _d(d), _block(block), _tensor_bags(tensor_bags), _tensor_context(tensor_context),
- _buffer_context(buffer_context), _graph(graph)
+ : _m(m), _d(d), _block(block), _tensor_bags(tensor_bags), _tensor_context(tensor_context),
+ _buffer_context(buffer_context), _graph(graph)
{
// DO NOTHING
}
#include <cmdline/View.h>
-#include <stdex/Memory.h>
-
+#include <memory>
#include <fstream>
#include <cassert>
-using stdex::make_unique;
+using std::make_unique;
extern "C" std::unique_ptr<enco::Frontend> make_frontend(const cmdline::View &cmdline)
{
#include "Frontend.h"
-#include <stdex/Memory.h>
+#include <memory>
#include <gtest/gtest.h>
-using stdex::make_unique;
+using std::make_unique;
namespace
{
#include "Op/Div.h"
#include <schema_generated.h>
-#include <stdex/Memory.h>
+#include <memory>
#include <map>
-using stdex::make_unique;
+using std::make_unique;
namespace tflimport
{
// add GraphBuilder for each tflite operation.
_builder_map[tflite::BuiltinOperator_CONV_2D] = make_unique<Conv2DGraphBuilder>();
_builder_map[tflite::BuiltinOperator_DEPTHWISE_CONV_2D] =
- make_unique<DepthwiseConv2DGraphBuilder>();
+ make_unique<DepthwiseConv2DGraphBuilder>();
_builder_map[tflite::BuiltinOperator_AVERAGE_POOL_2D] = make_unique<AvgPool2DGraphBuilder>();
_builder_map[tflite::BuiltinOperator_MAX_POOL_2D] = make_unique<MaxPool2DGraphBuilder>();
_builder_map[tflite::BuiltinOperator_CONCATENATION] = make_unique<ConcatenationGraphBuilder>();
coco_avgpool2d->stride()->horizontal(params->stride_w());
coco::Padding2D padding =
- pool2D_padding(params, ifm_shape, params->filter_width(), params->filter_height());
+ pool2D_padding(params, ifm_shape, params->filter_width(), params->filter_height());
coco_avgpool2d->pad()->top(padding.top());
coco_avgpool2d->pad()->bottom(padding.bottom());
// fused activation
coco::FeatureObject *act_output =
- build_activation(conv_params->fused_activation_function(), blk, last_obj);
+ build_activation(conv_params->fused_activation_function(), blk, last_obj);
// Create Copy Instr of last_obj to Output Object
auto copy_ins = instr_builder(m).copy(ofm_obj, act_output);
auto wc = new_shape.width() * new_shape.depth();
ker_spn[n * hwc + h * wc + w * new_shape.depth() + c] =
- buffer.ptr[tfl_n * hw * new_shape.count() + /* new_shape.count() is old c */
- h * new_shape.width() * new_shape.count() + w * new_shape.count() + tfl_c];
+ buffer.ptr[tfl_n * hw * new_shape.count() + /* new_shape.count() is old c */
+ h * new_shape.width() * new_shape.count() + w * new_shape.count() + tfl_c];
}
}
}
// fused activation
coco::FeatureObject *act_output =
- build_activation(dconv_params->fused_activation_function(), blk, last_obj);
+ build_activation(dconv_params->fused_activation_function(), blk, last_obj);
// Create Copy Instr of last_obj to Output Object
auto copy_ins = instr_builder(m).copy(ofm_obj, act_output);
coco_maxpool2d->stride()->horizontal(params->stride_w());
coco::Padding2D padding =
- pool2D_padding(params, ifm_shape, params->filter_width(), params->filter_height());
+ pool2D_padding(params, ifm_shape, params->filter_width(), params->filter_height());
coco_maxpool2d->pad()->top(padding.top());
coco_maxpool2d->pad()->bottom(padding.bottom());
add_library(${PREFIX}-frontend SHARED enco.test.cpp)
target_link_libraries(${PREFIX}-frontend enco_intf_cmdline)
target_link_libraries(${PREFIX}-frontend enco_intf_frontend)
-target_link_libraries(${PREFIX}-frontend stdex)
# NOTE BYPRODUCTS are not specified in order to enforce source code generation
add_custom_command(OUTPUT ${GENERATED_CPP} ${GENERATED_ASM} ${GENERATED_BIN}
#include <nncc/core/ADT/tensor/LexicalLayout.h>
-#include <stdex/Memory.h>
+#include <memory>
using namespace nncc::core::ADT;
extern "C" std::unique_ptr<enco::Frontend> make_frontend(const cmdline::View &cmdline)
{
- return stdex::make_unique<Frontend>();
+ return std::make_unique<Frontend>();
}
#include <nncc/core/ADT/tensor/LexicalLayout.h>
#include <nncc/core/ADT/tensor/Overlay.h>
-#include <stdex/Memory.h>
+#include <memory>
-using stdex::make_unique;
+using std::make_unique;
using namespace nncc::core::ADT;
namespace
target_link_libraries(${BINDER_TARGET} nnkit_intf_backend)
target_link_libraries(${BINDER_TARGET} ann_api)
target_link_libraries(${BINDER_TARGET} ann_ref_static)
- target_link_libraries(${BINDER_TARGET} stdex)
set_target_properties(${BINDER_TARGET} PROPERTIES OUTPUT_NAME ${PREFIX})
list(APPEND TESTS ${PREFIX})
target_link_libraries(${BINDER_TARGET} nnkit_intf_backend)
target_link_libraries(${BINDER_TARGET} ann_api)
target_link_libraries(${BINDER_TARGET} ann_ref_static)
- target_link_libraries(${BINDER_TARGET} stdex)
set_target_properties(${BINDER_TARGET} PROPERTIES OUTPUT_NAME ${PREFIX})
list(APPEND TESTS ${PREFIX})
target_link_libraries(encodump enco_intf_frontend)
target_link_libraries(encodump enco_core)
target_link_libraries(encodump safemain)
-target_link_libraries(encodump stdex)
target_link_libraries(encodump dl)
} // namespace
-#include <stdex/Memory.h>
-
+#include <memory>
#include <map>
#include <iostream>
std::map<std::string, std::function<void(const std::string &arg)>> argparse;
argparse["--frontend"] = [&](const std::string &path) {
- frontend_zone = stdex::make_unique<FrontendZone>(path);
+ frontend_zone = std::make_unique<FrontendZone>(path);
};
argparse["--frontend-arg"] = [&](const std::string &arg) { frontend_zone->append(arg); };
target_link_libraries(exo PUBLIC exo_tflite_fbs)
target_link_libraries(exo PUBLIC exo_circle_fbs)
target_link_libraries(exo PUBLIC loco)
-target_link_libraries(exo PRIVATE stdex)
target_link_libraries(exo PRIVATE pepper_str)
target_link_libraries(exo PRIVATE pepper_strcast)
target_link_libraries(exo PRIVATE locoex_customop)
GTest_AddTest(exo_test ${TESTS})
target_include_directories(exo_test PRIVATE src)
-target_link_libraries(exo_test stdex)
target_link_libraries(exo_test pepper_str)
target_link_libraries(exo_test exo)
target_link_libraries(exo_test hermes_std)
-require("stdex")
require("loco")
require("locoex-customop")
require("logo")
#include "CircleExporterImpl.h"
-#include <stdex/Memory.h>
-
#include <oops/InternalExn.h>
+#include <memory>
#include <fstream>
namespace exo
{
-CircleExporter::CircleExporter(loco::Graph *graph) : _impl(stdex::make_unique<Impl>(graph))
+CircleExporter::CircleExporter(loco::Graph *graph) : _impl(std::make_unique<Impl>(graph))
{
// NOTHING TO DO
}
INTERNAL_EXN("Cannot find code for customop even though opcode is BuiltinOperator_CUSTOM");
operator_codes_vec[idx] =
- CreateOperatorCode(builder, it.first.opcode, builder.CreateString(custom_code->second));
+ CreateOperatorCode(builder, it.first.opcode, builder.CreateString(custom_code->second));
}
}
return builder.CreateVector(operator_codes_vec);
// encode operator codes
auto operator_codes =
- encodeOperatorCodes(_builder, gd._operator_codes, gd._custom_operator_codes);
+ encodeOperatorCodes(_builder, gd._operator_codes, gd._custom_operator_codes);
// Subgraphs
Offset<SubGraph> subgraph = exportSubgraph(gd);
//
// NOTE input and output 'feature' map are shape of NHWC
bool same_padding_criterion_1 =
- (static_cast<uint32_t>(ofm._dims[1]) == (ifm._dims[1] - 1) / stride->vertical() + 1) &&
- (static_cast<uint32_t>(ofm._dims[2]) == (ifm._dims[2] - 1) / stride->horizontal() + 1);
+ (static_cast<uint32_t>(ofm._dims[1]) == (ifm._dims[1] - 1) / stride->vertical() + 1) &&
+ (static_cast<uint32_t>(ofm._dims[2]) == (ifm._dims[2] - 1) / stride->horizontal() + 1);
// For same padding, rear padding is same or bigger than front padding by at most 1
bool same_padding_criterion_2 =
- (pad->top() <= pad->bottom()) && (pad->bottom() <= pad->top() + 1) &&
- (pad->left() <= pad->right()) && (pad->right() <= pad->left() + 1);
+ (pad->top() <= pad->bottom()) && (pad->bottom() <= pad->top() + 1) &&
+ (pad->left() <= pad->right()) && (pad->right() <= pad->left() + 1);
if (same_padding_criterion_1 && same_padding_criterion_2)
return circle::Padding_SAME;
gd._data_format = circle::DataFormat::DataFormat_CHANNELS_LAST;
}
-#include <stdex/Memory.h>
-
+#include <memory>
#include <cassert>
namespace
void set_tensor_index(loco::Node *node, const TFLTensorIndex &tensor_id)
{
assert(node->annot<TFLTensorIndexAnnotation>() == nullptr);
- node->annot(stdex::make_unique<TFLTensorIndexAnnotation>(tensor_id));
+ node->annot(std::make_unique<TFLTensorIndexAnnotation>(tensor_id));
}
TFLTensorIndex get_tensor_index(loco::Node *node)
void visit(loco::ReLU *) final;
void visit(loco::ReLU6 *) final;
void visit(loco::Tanh *) final;
- void visit(loco::Push *) final { /* DO NOTHING */}
- void visit(loco::Pull *) final { /* DO NOTHING */}
+ void visit(loco::Push *) final
+ { /* DO NOTHING */
+ }
+ void visit(loco::Pull *) final
+ { /* DO NOTHING */
+ }
void visit(loco::FeatureEncode *) final;
void visit(loco::FeatureDecode *) final;
void visit(loco::FilterEncode *) final;
void visit(loco::DepthwiseFilterEncode *) final;
- void visit(loco::ConstGen *) final { /* skip, everything is done in exportOpDefinedTensors */}
+ void visit(loco::ConstGen *) final
+ { /* skip, everything is done in exportOpDefinedTensors */
+ }
void visit(loco::MaxPool2D *) final;
void visit(loco::AvgPool2D *) final;
void visit(loco::Conv2D *) final;
auto inputs = builder.CreateVector(inputs_vec);
auto outputs = builder.CreateVector(outputs_vec);
auto options =
- CreateFullyConnectedOptions(builder, to_circle_actfunc(node->fusedActivationFunction()));
+ CreateFullyConnectedOptions(builder, to_circle_actfunc(node->fusedActivationFunction()));
// Make FULLY_CONNECTED operator
auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
auto options = CreateTransposeOptions(builder);
auto op_offset =
- CreateOperator(builder, op_idx, inputs, outputs,
- circle::BuiltinOptions::BuiltinOptions_TransposeOptions, options.Union());
+ CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions::BuiltinOptions_TransposeOptions, options.Union());
gd._operators.push_back(op_offset);
}
auto outputs = builder.CreateVector(outputs_vec);
circle::Padding padding = getOpPadding(node->padding());
auto options =
- CreateTransposeConvOptions(builder, padding, node->stride()->w(), node->stride()->h());
+ CreateTransposeConvOptions(builder, padding, node->stride()->w(), node->stride()->h());
// Make TRANSPOSE_CONV operator
auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
void OperationExporter::export_pool_2d(TFLPool2D *node, circle::BuiltinOperator builtin_op)
{
EXO_ASSERT(builtin_op == circle::BuiltinOperator_MAX_POOL_2D ||
- builtin_op == circle::BuiltinOperator_AVERAGE_POOL_2D,
+ builtin_op == circle::BuiltinOperator_AVERAGE_POOL_2D,
"should be maxpool or avgpool");
EXO_ASSERT(node->padding() != locoex::Padding::UNDEFINED, "Padding is not set");
auto inputs = builder.CreateVector(inputs_vec);
auto outputs = builder.CreateVector(outputs_vec);
circle::Padding padding = getOpPadding(
- node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
- auto options = CreatePool2DOptions(builder, padding, node->stride()->horizontal(),
- node->stride()->vertical(), node->window()->horizontal(),
- node->window()->vertical());
+ node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+ auto options =
+ CreatePool2DOptions(builder, padding, node->stride()->horizontal(), node->stride()->vertical(),
+ node->window()->horizontal(), node->window()->vertical());
auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
circle::BuiltinOptions_Pool2DOptions, options.Union());
gd._operators.push_back(op_offset);
auto inputs = builder.CreateVector(inputs_vec);
auto outputs = builder.CreateVector(outputs_vec);
circle::Padding padding = getOpPadding(
- node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
- auto options = CreatePool2DOptions(builder, padding, node->stride()->horizontal(),
- node->stride()->vertical(), node->window()->horizontal(),
- node->window()->vertical());
+ node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+ auto options =
+ CreatePool2DOptions(builder, padding, node->stride()->horizontal(), node->stride()->vertical(),
+ node->window()->horizontal(), node->window()->vertical());
auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
circle::BuiltinOptions_Pool2DOptions, options.Union());
gd._operators.push_back(op_offset);
std::vector<float> bias_vec_data(bias_vec_size); // initialized as zero vector
auto bias_vec_offset =
- builder.CreateVector(reinterpret_cast<uint8_t *>(bias_vec_data.data()), raw_bias_vec_size);
+ builder.CreateVector(reinterpret_cast<uint8_t *>(bias_vec_data.data()), raw_bias_vec_size);
auto bias_buffer_offset = CreateBuffer(builder, bias_vec_offset);
auto name_offset = builder.CreateString("t_" + std::to_string(bias_tensor_id));
auto bias_tensor_offset =
- CreateTensor(builder, bias_vec_shape_offset, TensorType_FLOAT32, bias_buffer_id, name_offset);
+ CreateTensor(builder, bias_vec_shape_offset, TensorType_FLOAT32, bias_buffer_id, name_offset);
gd._tensors.push_back(bias_tensor_offset);
// Make input, output and options for operator
auto inputs = builder.CreateVector(inputs_vec);
auto outputs = builder.CreateVector(outputs_vec);
circle::Padding padding = getOpPadding(
- node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
- auto options = CreateConv2DOptions(builder, padding, node->stride()->horizontal(),
- node->stride()->vertical());
+ node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+ auto options =
+ CreateConv2DOptions(builder, padding, node->stride()->horizontal(), node->stride()->vertical());
// Make CONV_2D operator
auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
}
auto outshape_vec_offset = builder.CreateVector(
- reinterpret_cast<uint8_t *>(outshape_vec_data.data()), raw_outshape_vec_size);
+ reinterpret_cast<uint8_t *>(outshape_vec_data.data()), raw_outshape_vec_size);
auto outshape_buffer_offset = CreateBuffer(builder, outshape_vec_offset);
size_t raw_bias_vec_size = bias_vec_size * sizeof(int32_t);
std::vector<float> bias_vec_data(bias_vec_size);
auto bias_vec_offset =
- builder.CreateVector(reinterpret_cast<uint8_t *>(bias_vec_data.data()), raw_bias_vec_size);
+ builder.CreateVector(reinterpret_cast<uint8_t *>(bias_vec_data.data()), raw_bias_vec_size);
auto bias_buffer_offset = CreateBuffer(builder, bias_vec_offset);
auto name_offset = builder.CreateString("t_" + std::to_string(bias_tensor_id));
auto bias_tensor_offset =
- CreateTensor(builder, bias_vec_shape_offset, TensorType_FLOAT32, bias_buffer_id, name_offset);
+ CreateTensor(builder, bias_vec_shape_offset, TensorType_FLOAT32, bias_buffer_id, name_offset);
gd._tensors.push_back(bias_tensor_offset);
std::vector<int32_t> inputs_vec{get_tensor_index(node->ifm()), get_tensor_index(node->ker()),
auto inputs = builder.CreateVector(inputs_vec);
auto outputs = builder.CreateVector(outputs_vec);
circle::Padding padding = getOpPadding(
- node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+ node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
int32_t ifm_channel_size = ShapeInference::get(node->ifm())._dims[3];
// multiplier = bias_vec_size(output_size)/ifm_channel_size
auto options =
- CreateDepthwiseConv2DOptions(builder, padding, node->stride()->horizontal(),
- node->stride()->vertical(), bias_vec_size / ifm_channel_size);
+ CreateDepthwiseConv2DOptions(builder, padding, node->stride()->horizontal(),
+ node->stride()->vertical(), bias_vec_size / ifm_channel_size);
auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
circle::BuiltinOptions_DepthwiseConv2DOptions, options.Union());
size_t raw_axes_vec_size = axes_vec_size * sizeof(int32_t);
auto axes_vec_offset =
- builder.CreateVector(reinterpret_cast<uint8_t *>(axes_vec.data()), raw_axes_vec_size);
+ builder.CreateVector(reinterpret_cast<uint8_t *>(axes_vec.data()), raw_axes_vec_size);
auto axes_buffer_offset = CreateBuffer(builder, axes_vec_offset);
auto name_offset = builder.CreateString("t_" + std::to_string(axes_tensor_id));
auto axes_tensor_offset =
- CreateTensor(builder, axes_vec_shape_offset, TensorType_INT32, axes_buffer_id, name_offset);
+ CreateTensor(builder, axes_vec_shape_offset, TensorType_INT32, axes_buffer_id, name_offset);
gd._tensors.push_back(axes_tensor_offset);
std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), axes_tensor_id};
constexpr size_t raw_perm_vec_size = perm_vec_size * sizeof(int32_t);
auto perm_vec_offset =
- builder.CreateVector(reinterpret_cast<uint8_t *>(perm_vec_data.data()), raw_perm_vec_size);
+ builder.CreateVector(reinterpret_cast<uint8_t *>(perm_vec_data.data()), raw_perm_vec_size);
auto perm_buffer_offset = CreateBuffer(builder, perm_vec_offset);
auto name_offset = builder.CreateString("t_" + std::to_string(perm_tensor_id));
auto perm_tensor_offset =
- CreateTensor(builder, perm_vec_shape_offset, TensorType_INT32, perm_buffer_id, name_offset);
+ CreateTensor(builder, perm_vec_shape_offset, TensorType_INT32, perm_buffer_id, name_offset);
gd._tensors.push_back(perm_tensor_offset);
// Create permutation node
constexpr auto options_type = circle::BuiltinOptions::BuiltinOptions_TransposeOptions;
auto transpose_offset =
- CreateOperator(builder, op_idx, inputs, outputs, options_type, options.Union());
+ CreateOperator(builder, op_idx, inputs, outputs, options_type, options.Union());
gd._operators.push_back(transpose_offset);
}
// but also by input.
auto input_shape_shape_vec_offset =
- builder.CreateVector(std::vector<int32_t>{(int32_t)new_shape_vec.size()});
+ builder.CreateVector(std::vector<int32_t>{(int32_t)new_shape_vec.size()});
size_t input_shape_vec_size = new_shape_vec.size() * sizeof(int32_t);
auto input_shape_input_vec_offset =
- builder.CreateVector(reinterpret_cast<uint8_t *>(new_shape_vec.data()), input_shape_vec_size);
+ builder.CreateVector(reinterpret_cast<uint8_t *>(new_shape_vec.data()), input_shape_vec_size);
auto input_shape_buffer_offset = CreateBuffer(builder, input_shape_input_vec_offset);
const auto input_shape_buffer_id = static_cast<uint32_t>(gd._buffers.size());
auto input_shape_tensor_id = static_cast<int32_t>(gd._tensors.size());
auto name_offset = builder.CreateString("t_" + std::to_string(input_shape_tensor_id));
auto input_shape_tensor_offset = CreateTensor(
- builder, input_shape_shape_vec_offset, TensorType_INT32, input_shape_buffer_id, name_offset);
+ builder, input_shape_shape_vec_offset, TensorType_INT32, input_shape_buffer_id, name_offset);
gd._tensors.push_back(input_shape_tensor_offset);
uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_RESHAPE);
auto padding_shape_vec_ptr = builder.CreateVector(std::vector<int32_t>{padding_vec_size, 2});
// create tensor
auto padding_tensor_ptr =
- CreateTensor(builder, padding_shape_vec_ptr, TensorType_INT32, padding_buffer_id);
+ CreateTensor(builder, padding_shape_vec_ptr, TensorType_INT32, padding_buffer_id);
// get tensor id
const auto padding_tensor_id = static_cast<int32_t>(gd._tensors.size());
#include <oops/InternalExn.h>
-#include <stdex/Memory.h>
-
#include <stdexcept>
#include <type_traits>
reshape->tensor(filter_dec);
int32_t new_shape[4] = {
- 1, static_cast<int32_t>(filter_shape.height().value()),
- static_cast<int32_t>(filter_shape.width().value()),
- static_cast<int32_t>(filter_shape.depth().value() * filter_shape.multiplier().value())};
+ 1, static_cast<int32_t>(filter_shape.height().value()),
+ static_cast<int32_t>(filter_shape.width().value()),
+ static_cast<int32_t>(filter_shape.depth().value() * filter_shape.multiplier().value())};
locoex::set_new_shape(reshape, new_shape, 4);
tfl_dw_conv2d->filter(reshape);
#include <logo/RemoveForwardNodePass.h>
#include <logo/Phase.h>
-#include <stdex/Memory.h>
+#include <memory>
namespace exo
{
logo::Phase phase;
{
// prepare type and shape before conversion
- phase.emplace_back(stdex::make_unique<TypeInferencePass>());
- phase.emplace_back(stdex::make_unique<ShapeInferencePass>());
+ phase.emplace_back(std::make_unique<TypeInferencePass>());
+ phase.emplace_back(std::make_unique<ShapeInferencePass>());
// Add converters for canonical nodes. Note: Not all loco canonical nodes are listed.
- phase.emplace_back(stdex::make_unique<AvgPool2DConverter>());
- phase.emplace_back(stdex::make_unique<ConstGenConverter>());
- phase.emplace_back(stdex::make_unique<Conv2DConverter>());
- phase.emplace_back(stdex::make_unique<DepthwiseConv2DConverter>());
+ phase.emplace_back(std::make_unique<AvgPool2DConverter>());
+ phase.emplace_back(std::make_unique<ConstGenConverter>());
+ phase.emplace_back(std::make_unique<Conv2DConverter>());
+ phase.emplace_back(std::make_unique<DepthwiseConv2DConverter>());
// TODO loco::DepthwiseFilterEncode
- phase.emplace_back(stdex::make_unique<EltwiseAddConverter>());
- phase.emplace_back(stdex::make_unique<EltwiseDivConverter>());
- phase.emplace_back(stdex::make_unique<EltwiseMaxConverter>());
- phase.emplace_back(stdex::make_unique<EltwiseMulConverter>());
- phase.emplace_back(stdex::make_unique<EltwiseSqrtConverter>());
- phase.emplace_back(stdex::make_unique<EltwiseSubConverter>());
- phase.emplace_back(stdex::make_unique<FeatureBiasAddConverter>());
+ phase.emplace_back(std::make_unique<EltwiseAddConverter>());
+ phase.emplace_back(std::make_unique<EltwiseDivConverter>());
+ phase.emplace_back(std::make_unique<EltwiseMaxConverter>());
+ phase.emplace_back(std::make_unique<EltwiseMulConverter>());
+ phase.emplace_back(std::make_unique<EltwiseSqrtConverter>());
+ phase.emplace_back(std::make_unique<EltwiseSubConverter>());
+ phase.emplace_back(std::make_unique<FeatureBiasAddConverter>());
// TODO loco::FixedReshape
- phase.emplace_back(stdex::make_unique<MatMulConverter>());
- phase.emplace_back(stdex::make_unique<MaxPool2DConverter>());
- phase.emplace_back(stdex::make_unique<ReluConverter>());
- phase.emplace_back(stdex::make_unique<Relu6Converter>());
+ phase.emplace_back(std::make_unique<MatMulConverter>());
+ phase.emplace_back(std::make_unique<MaxPool2DConverter>());
+ phase.emplace_back(std::make_unique<ReluConverter>());
+ phase.emplace_back(std::make_unique<Relu6Converter>());
// TODO loco::Tanh
- phase.emplace_back(stdex::make_unique<TensorConcatConverter>());
+ phase.emplace_back(std::make_unique<TensorConcatConverter>());
// TODO loco::TensorBiasAdd
- phase.emplace_back(stdex::make_unique<TensorBroadcastConverter>());
- phase.emplace_back(stdex::make_unique<TensorReduceConverter>());
+ phase.emplace_back(std::make_unique<TensorBroadcastConverter>());
+ phase.emplace_back(std::make_unique<TensorReduceConverter>());
// TODO loco::TensorSoftmax
- phase.emplace_back(stdex::make_unique<TensorTransposeConverter>());
- phase.emplace_back(stdex::make_unique<TransposedConv2DConverter>());
+ phase.emplace_back(std::make_unique<TensorTransposeConverter>());
+ phase.emplace_back(std::make_unique<TransposedConv2DConverter>());
// Add optimization below
- phase.emplace_back(stdex::make_unique<logo::SimplifyDomainConversionPass>());
- phase.emplace_back(stdex::make_unique<logo::RemoveForwardNodePass>());
- phase.emplace_back(stdex::make_unique<logo::RemoveDeadNodePass>());
+ phase.emplace_back(std::make_unique<logo::SimplifyDomainConversionPass>());
+ phase.emplace_back(std::make_unique<logo::RemoveForwardNodePass>());
+ phase.emplace_back(std::make_unique<logo::RemoveDeadNodePass>());
}
logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{graph};
* @brief INSTANCE_NORM in circle
*/
class CircleInstanceNorm final
- : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::INSTANCE_NORM>>,
- public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
+ : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::INSTANCE_NORM>>,
+ public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
{
public:
/// @note Currently only support FLOAT32 as input node
public TFLNodeMixin<TFLNodeTrait::FusedActFunc>
{
public:
- TFLAveragePool2D() : _padding(Padding::UNDEFINED) { /* empty */}
+ TFLAveragePool2D() : _padding(Padding::UNDEFINED)
+ { /* empty */
+ }
public:
loco::Node *value(void) const { return at(0)->node(); }
* @brief DEPTHWISE_CONV_2D in TensorFlow Lite
*/
class TFLDepthwiseConv2D final
- : public FixedArityNode<3, TFLNodeImpl<TFLOpcode::DEPTHWISE_CONV_2D>>,
- public TFLNodeMixin<TFLNodeTrait::FusedActFunc>,
- public TFLNodeMixin<TFLNodeTrait::Bias>
+ : public FixedArityNode<3, TFLNodeImpl<TFLOpcode::DEPTHWISE_CONV_2D>>,
+ public TFLNodeMixin<TFLNodeTrait::FusedActFunc>,
+ public TFLNodeMixin<TFLNodeTrait::Bias>
{
public:
loco::Node *input(void) const { return at(0)->node(); }
public TFLNodeMixin<TFLNodeTrait::FusedActFunc>
{
public:
- TFLMaxPool2D() : _padding(Padding::UNDEFINED) { /* empty */}
+ TFLMaxPool2D() : _padding(Padding::UNDEFINED)
+ { /* empty */
+ }
public:
loco::Node *value(void) const { return at(0)->node(); }
};
class TFLSquaredDifference final
- : public FixedArityNode<2, TFLNodeImpl<TFLOpcode::SQUARED_DIFFERENCE>>
+ : public FixedArityNode<2, TFLNodeImpl<TFLOpcode::SQUARED_DIFFERENCE>>
{
public:
TFLSquaredDifference() = default;
#include <loco/Service/CanonicalShapeInferenceRule.h>
#include <loco/Service/MultiDialectShapeInferenceRule.h>
-#include <stdex/Memory.h>
-
#include <gtest/gtest.h>
TEST(TFLShapeInferenceRuleTest, minimal_with_TFLRelu)
loco::MultiDialectShapeInferenceRule rules;
rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
- .bind(locoex::TFLDialect::get(), &tfl_rule);
+ .bind(locoex::TFLDialect::get(), &tfl_rule);
loco::apply(&rules).to(graph.g.get());
loco::MultiDialectShapeInferenceRule rules;
rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
- .bind(locoex::TFLDialect::get(), &tfl_rule);
+ .bind(locoex::TFLDialect::get(), &tfl_rule);
loco::apply(&rules).to(graph.g.get());
loco::MultiDialectShapeInferenceRule rules;
rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
- .bind(locoex::TFLDialect::get(), &tfl_rule);
+ .bind(locoex::TFLDialect::get(), &tfl_rule);
loco::apply(&rules).to(graph.g.get());
#include <loco/IR/CanonicalDialect.h>
#include <loco/Service/TypeInference.h>
-#include <stdex/Memory.h>
-
#include <gtest/gtest.h>
TEST(TFLTypeInferenceRuleTest, minimal_with_TFLRelu)
#include <locop/FormattedGraph.h>
-#include <stdex/Memory.h>
+#include <memory>
namespace exo
{
public:
std::unique_ptr<locop::NodeSummaryBuilder> create(const locop::SymbolTable *tlb) const final
{
- return stdex::make_unique<NodeSummaryBuilder>(tlb);
+ return std::make_unique<NodeSummaryBuilder>(tlb);
}
};
#include <logo/Phase.h>
-#include <stdex/Memory.h>
+#include <memory>
namespace exo
{
logo::Phase phase;
{
// prepare type and shape before optimization
- phase.emplace_back(stdex::make_unique<TypeInferencePass>());
- phase.emplace_back(stdex::make_unique<ShapeInferencePass>());
+ phase.emplace_back(std::make_unique<TypeInferencePass>());
+ phase.emplace_back(std::make_unique<ShapeInferencePass>());
- phase.emplace_back(stdex::make_unique<FoldReshapeOfConstPass>());
- phase.emplace_back(stdex::make_unique<FoldTransposeOfConstPass>());
+ phase.emplace_back(std::make_unique<FoldReshapeOfConstPass>());
+ phase.emplace_back(std::make_unique<FoldTransposeOfConstPass>());
if (get<Knob::UseFuseBiasAddPass>())
{
- phase.emplace_back(stdex::make_unique<FuseBiasAddPass>());
+ phase.emplace_back(std::make_unique<FuseBiasAddPass>());
}
if (get<Knob::UseFuseInstanceNormPass>())
{
- phase.emplace_back(stdex::make_unique<FuseInstanceNormPass>());
+ phase.emplace_back(std::make_unique<FuseInstanceNormPass>());
}
if (get<Knob::UseFuseReluPass>())
{
- phase.emplace_back(stdex::make_unique<FuseReluPass>());
+ phase.emplace_back(std::make_unique<FuseReluPass>());
}
- phase.emplace_back(stdex::make_unique<FuseRsqrtPass>());
+ phase.emplace_back(std::make_unique<FuseRsqrtPass>());
if (get<Knob::UseFuseSquaredDifferencePass>())
{
- phase.emplace_back(stdex::make_unique<FuseSquaredDifferencePass>());
+ phase.emplace_back(std::make_unique<FuseSquaredDifferencePass>());
}
- phase.emplace_back(stdex::make_unique<MergeConcatNodesPass>());
+ phase.emplace_back(std::make_unique<MergeConcatNodesPass>());
- phase.emplace_back(stdex::make_unique<logo::RemoveDeadNodePass>());
+ phase.emplace_back(std::make_unique<logo::RemoveDeadNodePass>());
}
logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{g};
#include "Check.h"
#include <loco.h>
-#include <stdex/Memory.h>
+#include <memory>
namespace
{
EXO_ASSERT(input_for_encode != nullptr, "input should not be nullptr");
loco::Graph *g = input_for_encode->graph();
- auto encoder = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Feature>>();
+ auto encoder = std::make_unique<loco::PermutingEncoder<loco::Domain::Feature>>();
encoder->perm(perm<T>());
EXO_ASSERT(input_for_decode != nullptr, "input should not be nullptr");
loco::Graph *g = input_for_decode->graph();
- auto decoder = stdex::make_unique<loco::PermutingDecoder<loco::Domain::Feature>>();
+ auto decoder = std::make_unique<loco::PermutingDecoder<loco::Domain::Feature>>();
decoder->perm(perm<T>());
EXO_ASSERT(input_for_encode != nullptr, "filter should not be nullptr");
loco::Graph *g = input_for_encode->graph();
- auto encoder = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
+ auto encoder = std::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
encoder->perm(perm<T>());
EXO_ASSERT(input_for_decode != nullptr, "filter should not be nullptr");
loco::Graph *g = input_for_decode->graph();
- auto decoder = stdex::make_unique<loco::PermutingDecoder<loco::Domain::Filter>>();
+ auto decoder = std::make_unique<loco::PermutingDecoder<loco::Domain::Filter>>();
decoder->perm(perm<T>());
EXO_ASSERT(input_for_decode != nullptr, "filter should not be nullptr");
loco::Graph *g = input_for_decode->graph();
- auto decoder = stdex::make_unique<loco::PermutingDecoder<loco::Domain::DepthwiseFilter>>();
+ auto decoder = std::make_unique<loco::PermutingDecoder<loco::Domain::DepthwiseFilter>>();
decoder->perm(perm<T>());
EXO_ASSERT(input_for_encode != nullptr, "input should not be nullptr");
loco::Graph *g = input_for_encode->graph();
- auto encoder = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Matrix>>();
+ auto encoder = std::make_unique<loco::PermutingEncoder<loco::Domain::Matrix>>();
encoder->perm(perm<T>());
EXO_ASSERT(input_for_decode != nullptr, "input should not be nullptr");
loco::Graph *g = input_for_decode->graph();
- auto decoder = stdex::make_unique<loco::PermutingDecoder<loco::Domain::Matrix>>();
+ auto decoder = std::make_unique<loco::PermutingDecoder<loco::Domain::Matrix>>();
decoder->perm(perm<T>());
/// @brief Create a loco::MatrixDecode of given layout
template <MatrixLayout T> loco::MatrixDecode *make_matrix_decode(loco::Node *input_for_decode);
-} // exo
+} // namespace exo
//
// DomainConverter
#include "Log.h"
#include <hermes/ConsoleReporter.h>
-#include <stdex/Memory.h>
#include <cstdlib>
#include <iostream>
FormattedGraph fmt(loco::Graph *g)
{
- auto node_summary_builder = stdex::make_unique<NodeSummaryBuilderFactory>();
+ auto node_summary_builder = std::make_unique<NodeSummaryBuilderFactory>();
return std::move(locop::fmt<locop::LinearV1>(g).with(std::move(node_summary_builder)));
}
#include "Log.h" // To use LoggerConfig
#include <hermes/ConsoleReporter.h>
-#include <stdex/Memory.h>
+
+#include <memory>
namespace exo
{
if (ctx == nullptr)
{
ctx = new hermes::Context;
- ctx->sinks()->append(stdex::make_unique<hermes::ConsoleReporter>());
- ctx->config(stdex::make_unique<LoggerConfig>());
+ ctx->sinks()->append(std::make_unique<hermes::ConsoleReporter>());
+ ctx->config(std::make_unique<LoggerConfig>());
}
return ctx;
}
-} // namespac exo
+} // namespace exo
index_orig.at(perm->at<S32>(axis)) = index_new.at(axis);
const_new->at<FLOAT32>(l.offset(shape_new, index_new)) =
- const_orig->at<FLOAT32>(l.offset(shape_orig, index_orig));
+ const_orig->at<FLOAT32>(l.offset(shape_orig, index_orig));
}
// replace
Fuser(LatterT *latter)
{
static_assert(std::is_same<LatterT, locoex::TFLAdd>::value ||
- std::is_same<LatterT, locoex::TFLSub>::value,
+ std::is_same<LatterT, locoex::TFLSub>::value,
"wrong template type");
_latter = latter;
for (uint32_t x = 0; x < bias->dim(0).value(); x++)
new_bias->at<loco::DataType::FLOAT32>(x) = calc<LatterT>(
- bias->at<loco::DataType::FLOAT32>(x), _const_node->at<loco::DataType::FLOAT32>(x));
+ bias->at<loco::DataType::FLOAT32>(x), _const_node->at<loco::DataType::FLOAT32>(x));
}
return new_bias;
void setCandidate(FormerT *former, LatterT *latter, locoex::TFLConst *const_node)
{
static_assert(std::is_same<LatterT, locoex::TFLAdd>::value ||
- std::is_same<LatterT, locoex::TFLSub>::value,
+ std::is_same<LatterT, locoex::TFLSub>::value,
"wrong template type");
if (!check_act_func(former))
return;
auto depth =
- loco::shape_get(as_loco_node(former)).template as<loco::TensorShape>().dim(3).value();
+ loco::shape_get(as_loco_node(former)).template as<loco::TensorShape>().dim(3).value();
auto const_shape = loco::shape_get(const_node).template as<loco::TensorShape>();
if (const_shape.rank() == 1 and const_shape.dim(0) == depth)
CHECK_OR_FALSE(add_as_variance);
CHECK_OR_FALSE(
- fill(&mean_as_variance, &const_as_epsilon).with_commutative_args_of(add_as_variance));
+ fill(&mean_as_variance, &const_as_epsilon).with_commutative_args_of(add_as_variance));
CHECK_OR_FALSE(const_as_epsilon->dtype() == loco::DataType::FLOAT32);
// TODO Support regarding broadcast
locoex::TFLMul *mul_gamma_should_be = nullptr;
locoex::TFLMean *mean_of_ifm_should_be = nullptr;
CHECK_OR_FALSE(fill(&mul_gamma_should_be, &mean_of_ifm_should_be)
- .with_commutative_args_of(mul_as_scaled_mean));
+ .with_commutative_args_of(mul_as_scaled_mean));
CHECK_OR_FALSE(mul_gamma == mul_gamma_should_be);
CHECK_OR_FALSE(mean_of_ifm == mean_of_ifm_should_be);
#undef CHECK_OR_FALSE
{
static_assert((std::is_same<FusedTFLType, locoex::TFLRelu>::value &&
FusedActFunc == locoex::FusedActFunc::RELU) ||
- (std::is_same<FusedTFLType, locoex::TFLRelu6>::value &&
- FusedActFunc == locoex::FusedActFunc::RELU6),
+ (std::is_same<FusedTFLType, locoex::TFLRelu6>::value &&
+ FusedActFunc == locoex::FusedActFunc::RELU6),
"wrong template type");
exo::test::TestGraph g;
case locoex::FusedActFunc::RELU6:
return true;
- // case locoex::FusedActFunc::TANH:
- // return false;
+ // case locoex::FusedActFunc::TANH:
+ // return false;
default:
INTERNAL_EXN_V("Unknown FusedActFunc", oops::to_uint32(node1->fusedActivationFunction()));
loco::MultiDialectShapeInferenceRule rules;
rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
- .bind(locoex::TFLDialect::get(), &tfl_rule)
- .bind(locoex::CircleDialect::get(), &circle_rule)
- .bind(locoex::COpDialect::get(), &cop_rule);
+ .bind(locoex::TFLDialect::get(), &tfl_rule)
+ .bind(locoex::CircleDialect::get(), &circle_rule)
+ .bind(locoex::COpDialect::get(), &cop_rule);
return loco::apply(&rules).to(g);
}
loco::MultiDialectTypeInferenceRule rules;
rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
- .bind(locoex::TFLDialect::get(), &tfl_rule)
- .bind(locoex::CircleDialect::get(), &circle_rule)
- .bind(locoex::COpDialect::get(), &cop_rule);
+ .bind(locoex::TFLDialect::get(), &tfl_rule)
+ .bind(locoex::CircleDialect::get(), &circle_rule)
+ .bind(locoex::COpDialect::get(), &cop_rule);
return loco::apply(&rules).to(g);
}
{
public:
ProgressReporter(loco::Graph *graph, logo::PhaseStrategy strategy)
- : _graph{graph}, _strategy{strategy}
+ : _graph{graph}, _strategy{strategy}
{
// DO NOTHING
}
#include "TFLExporterImpl.h"
-#include <stdex/Memory.h>
-
#include <oops/InternalExn.h>
+#include <memory>
#include <fstream>
namespace exo
{
-TFLExporter::TFLExporter(loco::Graph *graph) : _impl(stdex::make_unique<Impl>(graph))
+TFLExporter::TFLExporter(loco::Graph *graph) : _impl(std::make_unique<Impl>(graph))
{
// NOTHING TO DO
}
INTERNAL_EXN("Cannot find code for custom op");
operator_codes_vec[idx] =
- CreateOperatorCode(builder, it.first.opcode, builder.CreateString(custom_code->second));
+ CreateOperatorCode(builder, it.first.opcode, builder.CreateString(custom_code->second));
}
}
return builder.CreateVector(operator_codes_vec);
// encode operator codes
auto operator_codes =
- encodeOperatorCodes(_builder, gd._operator_codes, gd._custom_operator_codes);
+ encodeOperatorCodes(_builder, gd._operator_codes, gd._custom_operator_codes);
// Subgraphs
Offset<SubGraph> subgraph = exportSubgraph(gd);
#include "Knob.h"
#include <loco/IR/PermutingCodec.h>
-#include <stdex/Memory.h>
+
+#include <memory>
#include <gtest/gtest.h>
{
loco::FeatureEncode *encode_layer = graph()->nodes()->create<loco::FeatureEncode>();
- auto encoder = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Feature>>();
+ auto encoder = std::make_unique<loco::PermutingEncoder<loco::Domain::Feature>>();
(*encoder->perm())[loco::FeatureAxis::Count] = 0;
(*encoder->perm())[loco::FeatureAxis::Depth] = 1;
(*encoder->perm())[loco::FeatureAxis::Height] = 2;
{
loco::FeatureDecode *decode_layer = graph()->nodes()->create<loco::FeatureDecode>();
- auto decoder = stdex::make_unique<loco::PermutingDecoder<loco::Domain::Feature>>();
+ auto decoder = std::make_unique<loco::PermutingDecoder<loco::Domain::Feature>>();
(*decoder->perm())[loco::FeatureAxis::Count] = 0;
(*decoder->perm())[loco::FeatureAxis::Depth] = 1;
(*decoder->perm())[loco::FeatureAxis::Height] = 2;
auto bufs = (model->buffers());
auto *perm_buf =
- reinterpret_cast<const int32_t *>(bufs->Get(perm_tensor->buffer())->data()->data());
+ reinterpret_cast<const int32_t *>(bufs->Get(perm_tensor->buffer())->data()->data());
ASSERT_EQ(1, perm_buf[0]);
ASSERT_EQ(2, perm_buf[1]);
auto bufs = (model->buffers());
auto *perm_buf =
- reinterpret_cast<const int32_t *>(bufs->Get(perm_tensor->buffer())->data()->data());
+ reinterpret_cast<const int32_t *>(bufs->Get(perm_tensor->buffer())->data()->data());
ASSERT_EQ(3, perm_buf[0]);
ASSERT_EQ(0, perm_buf[1]);
ASSERT_EQ(1, perm_buf[2]);
//
// NOTE input and output 'feature' map are shape of NHWC
bool same_padding_criterion_1 =
- (static_cast<uint32_t>(ofm._dims[1]) == (ifm._dims[1] - 1) / stride->vertical() + 1) &&
- (static_cast<uint32_t>(ofm._dims[2]) == (ifm._dims[2] - 1) / stride->horizontal() + 1);
+ (static_cast<uint32_t>(ofm._dims[1]) == (ifm._dims[1] - 1) / stride->vertical() + 1) &&
+ (static_cast<uint32_t>(ofm._dims[2]) == (ifm._dims[2] - 1) / stride->horizontal() + 1);
// For same padding, rear padding is same or bigger than front padding by at most 1
bool same_padding_criterion_2 =
- (pad->top() <= pad->bottom()) && (pad->bottom() <= pad->top() + 1) &&
- (pad->left() <= pad->right()) && (pad->right() <= pad->left() + 1);
+ (pad->top() <= pad->bottom()) && (pad->bottom() <= pad->top() + 1) &&
+ (pad->left() <= pad->right()) && (pad->right() <= pad->left() + 1);
if (same_padding_criterion_1 && same_padding_criterion_2)
return tflite::Padding_SAME;
}
}
-#include <stdex/Memory.h>
-
+#include <memory>
#include <cassert>
namespace
void set_tensor_index(loco::Node *node, const TFLTensorIndex &tensor_id)
{
assert(node->annot<TFLTensorIndexAnnotation>() == nullptr);
- node->annot(stdex::make_unique<TFLTensorIndexAnnotation>(tensor_id));
+ node->annot(std::make_unique<TFLTensorIndexAnnotation>(tensor_id));
}
TFLTensorIndex get_tensor_index(loco::Node *node)
void visit(loco::ReLU *) final;
void visit(loco::ReLU6 *) final;
void visit(loco::Tanh *) final;
- void visit(loco::Push *) final { /* DO NOTHING */}
- void visit(loco::Pull *) final { /* DO NOTHING */}
+ void visit(loco::Push *) final
+ { /* DO NOTHING */
+ }
+ void visit(loco::Pull *) final
+ { /* DO NOTHING */
+ }
void visit(loco::FeatureEncode *) final;
void visit(loco::FeatureDecode *) final;
void visit(loco::FilterEncode *) final;
void visit(loco::DepthwiseFilterEncode *) final;
- void visit(loco::ConstGen *) final { /* skip, everything is done in exportOpDefinedTensors */}
+ void visit(loco::ConstGen *) final
+ { /* skip, everything is done in exportOpDefinedTensors */
+ }
void visit(loco::MaxPool2D *) final;
void visit(loco::AvgPool2D *) final;
void visit(loco::Conv2D *) final;
auto inputs = builder.CreateVector(inputs_vec);
auto outputs = builder.CreateVector(outputs_vec);
auto options =
- CreateFullyConnectedOptions(builder, to_tflite_actfunc(node->fusedActivationFunction()));
+ CreateFullyConnectedOptions(builder, to_tflite_actfunc(node->fusedActivationFunction()));
// Make FULLY_CONNECTED operator
auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
auto options = CreateTransposeOptions(builder);
auto op_offset =
- CreateOperator(builder, op_idx, inputs, outputs,
- tflite::BuiltinOptions::BuiltinOptions_TransposeOptions, options.Union());
+ CreateOperator(builder, op_idx, inputs, outputs,
+ tflite::BuiltinOptions::BuiltinOptions_TransposeOptions, options.Union());
gd._operators.push_back(op_offset);
}
auto outputs = builder.CreateVector(outputs_vec);
tflite::Padding padding = getOpPadding(node->padding());
auto options =
- CreateTransposeConvOptions(builder, padding, node->stride()->w(), node->stride()->h());
+ CreateTransposeConvOptions(builder, padding, node->stride()->w(), node->stride()->h());
// Make TRANSPOSE_CONV operator
auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
void OperationExporter::export_pool_2d(TFLPool2D *node, tflite::BuiltinOperator builtin_op)
{
EXO_ASSERT(builtin_op == tflite::BuiltinOperator_MAX_POOL_2D ||
- builtin_op == tflite::BuiltinOperator_AVERAGE_POOL_2D,
+ builtin_op == tflite::BuiltinOperator_AVERAGE_POOL_2D,
"should be maxpool or avgpool");
EXO_ASSERT(node->padding() != locoex::Padding::UNDEFINED, "Padding is not set");
auto inputs = builder.CreateVector(inputs_vec);
auto outputs = builder.CreateVector(outputs_vec);
tflite::Padding padding = getOpPadding(
- node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
- auto options = CreatePool2DOptions(builder, padding, node->stride()->horizontal(),
- node->stride()->vertical(), node->window()->horizontal(),
- node->window()->vertical());
+ node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+ auto options =
+ CreatePool2DOptions(builder, padding, node->stride()->horizontal(), node->stride()->vertical(),
+ node->window()->horizontal(), node->window()->vertical());
auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
tflite::BuiltinOptions_Pool2DOptions, options.Union());
gd._operators.push_back(op_offset);
auto inputs = builder.CreateVector(inputs_vec);
auto outputs = builder.CreateVector(outputs_vec);
tflite::Padding padding = getOpPadding(
- node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
- auto options = CreatePool2DOptions(builder, padding, node->stride()->horizontal(),
- node->stride()->vertical(), node->window()->horizontal(),
- node->window()->vertical());
+ node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+ auto options =
+ CreatePool2DOptions(builder, padding, node->stride()->horizontal(), node->stride()->vertical(),
+ node->window()->horizontal(), node->window()->vertical());
auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
tflite::BuiltinOptions_Pool2DOptions, options.Union());
gd._operators.push_back(op_offset);
std::vector<float> bias_vec_data(bias_vec_size); // initialized as zero vector
auto bias_vec_offset =
- builder.CreateVector(reinterpret_cast<uint8_t *>(bias_vec_data.data()), raw_bias_vec_size);
+ builder.CreateVector(reinterpret_cast<uint8_t *>(bias_vec_data.data()), raw_bias_vec_size);
auto bias_buffer_offset = CreateBuffer(builder, bias_vec_offset);
auto name_offset = builder.CreateString("t_" + std::to_string(bias_tensor_id));
auto bias_tensor_offset =
- CreateTensor(builder, bias_vec_shape_offset, TensorType_FLOAT32, bias_buffer_id, name_offset);
+ CreateTensor(builder, bias_vec_shape_offset, TensorType_FLOAT32, bias_buffer_id, name_offset);
gd._tensors.push_back(bias_tensor_offset);
// Make input, output and options for operator
auto inputs = builder.CreateVector(inputs_vec);
auto outputs = builder.CreateVector(outputs_vec);
tflite::Padding padding = getOpPadding(
- node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
- auto options = CreateConv2DOptions(builder, padding, node->stride()->horizontal(),
- node->stride()->vertical());
+ node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+ auto options =
+ CreateConv2DOptions(builder, padding, node->stride()->horizontal(), node->stride()->vertical());
// Make CONV_2D operator
auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
}
auto outshape_vec_offset = builder.CreateVector(
- reinterpret_cast<uint8_t *>(outshape_vec_data.data()), raw_outshape_vec_size);
+ reinterpret_cast<uint8_t *>(outshape_vec_data.data()), raw_outshape_vec_size);
auto outshape_buffer_offset = CreateBuffer(builder, outshape_vec_offset);
size_t raw_bias_vec_size = bias_vec_size * sizeof(int32_t);
std::vector<float> bias_vec_data(bias_vec_size);
auto bias_vec_offset =
- builder.CreateVector(reinterpret_cast<uint8_t *>(bias_vec_data.data()), raw_bias_vec_size);
+ builder.CreateVector(reinterpret_cast<uint8_t *>(bias_vec_data.data()), raw_bias_vec_size);
auto bias_buffer_offset = CreateBuffer(builder, bias_vec_offset);
auto name_offset = builder.CreateString("t_" + std::to_string(bias_tensor_id));
auto bias_tensor_offset =
- CreateTensor(builder, bias_vec_shape_offset, TensorType_FLOAT32, bias_buffer_id, name_offset);
+ CreateTensor(builder, bias_vec_shape_offset, TensorType_FLOAT32, bias_buffer_id, name_offset);
gd._tensors.push_back(bias_tensor_offset);
std::vector<int32_t> inputs_vec{get_tensor_index(node->ifm()), get_tensor_index(node->ker()),
auto inputs = builder.CreateVector(inputs_vec);
auto outputs = builder.CreateVector(outputs_vec);
tflite::Padding padding = getOpPadding(
- node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+ node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
int32_t ifm_channel_size = ShapeInference::get(node->ifm())._dims[3];
// multiplier = bias_vec_size(output_size)/ifm_channel_size
auto options =
- CreateDepthwiseConv2DOptions(builder, padding, node->stride()->horizontal(),
- node->stride()->vertical(), bias_vec_size / ifm_channel_size);
+ CreateDepthwiseConv2DOptions(builder, padding, node->stride()->horizontal(),
+ node->stride()->vertical(), bias_vec_size / ifm_channel_size);
auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
tflite::BuiltinOptions_DepthwiseConv2DOptions, options.Union());
size_t raw_axes_vec_size = axes_vec_size * sizeof(int32_t);
auto axes_vec_offset =
- builder.CreateVector(reinterpret_cast<uint8_t *>(axes_vec.data()), raw_axes_vec_size);
+ builder.CreateVector(reinterpret_cast<uint8_t *>(axes_vec.data()), raw_axes_vec_size);
auto axes_buffer_offset = CreateBuffer(builder, axes_vec_offset);
auto name_offset = builder.CreateString("t_" + std::to_string(axes_tensor_id));
auto axes_tensor_offset =
- CreateTensor(builder, axes_vec_shape_offset, TensorType_INT32, axes_buffer_id, name_offset);
+ CreateTensor(builder, axes_vec_shape_offset, TensorType_INT32, axes_buffer_id, name_offset);
gd._tensors.push_back(axes_tensor_offset);
std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), axes_tensor_id};
constexpr size_t raw_perm_vec_size = perm_vec_size * sizeof(int32_t);
auto perm_vec_offset =
- builder.CreateVector(reinterpret_cast<uint8_t *>(perm_vec_data.data()), raw_perm_vec_size);
+ builder.CreateVector(reinterpret_cast<uint8_t *>(perm_vec_data.data()), raw_perm_vec_size);
auto perm_buffer_offset = CreateBuffer(builder, perm_vec_offset);
auto name_offset = builder.CreateString("t_" + std::to_string(perm_tensor_id));
auto perm_tensor_offset =
- CreateTensor(builder, perm_vec_shape_offset, TensorType_INT32, perm_buffer_id, name_offset);
+ CreateTensor(builder, perm_vec_shape_offset, TensorType_INT32, perm_buffer_id, name_offset);
gd._tensors.push_back(perm_tensor_offset);
// Create permutation node
constexpr auto options_type = tflite::BuiltinOptions::BuiltinOptions_TransposeOptions;
auto transpose_offset =
- CreateOperator(builder, op_idx, inputs, outputs, options_type, options.Union());
+ CreateOperator(builder, op_idx, inputs, outputs, options_type, options.Union());
gd._operators.push_back(transpose_offset);
}
// but also by input.
auto input_shape_shape_vec_offset =
- builder.CreateVector(std::vector<int32_t>{(int32_t)new_shape_vec.size()});
+ builder.CreateVector(std::vector<int32_t>{(int32_t)new_shape_vec.size()});
size_t input_shape_vec_size = new_shape_vec.size() * sizeof(int32_t);
auto input_shape_input_vec_offset =
- builder.CreateVector(reinterpret_cast<uint8_t *>(new_shape_vec.data()), input_shape_vec_size);
+ builder.CreateVector(reinterpret_cast<uint8_t *>(new_shape_vec.data()), input_shape_vec_size);
auto input_shape_buffer_offset = CreateBuffer(builder, input_shape_input_vec_offset);
const auto input_shape_buffer_id = static_cast<uint32_t>(gd._buffers.size());
auto input_shape_tensor_id = static_cast<int32_t>(gd._tensors.size());
auto name_offset = builder.CreateString("t_" + std::to_string(input_shape_tensor_id));
auto input_shape_tensor_offset = CreateTensor(
- builder, input_shape_shape_vec_offset, TensorType_INT32, input_shape_buffer_id, name_offset);
+ builder, input_shape_shape_vec_offset, TensorType_INT32, input_shape_buffer_id, name_offset);
gd._tensors.push_back(input_shape_tensor_offset);
uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_RESHAPE);
auto padding_shape_vec_ptr = builder.CreateVector(std::vector<int32_t>{padding_vec_size, 2});
// create tensor
auto padding_tensor_ptr =
- CreateTensor(builder, padding_shape_vec_ptr, TensorType_INT32, padding_buffer_id);
+ CreateTensor(builder, padding_shape_vec_ptr, TensorType_INT32, padding_buffer_id);
// get tensor id
const auto padding_tensor_id = static_cast<int32_t>(gd._tensors.size());
bool visit(loco::FeatureEncode *node) final
{
auto encoder =
- loco::must_cast<loco::PermutingEncoder<loco::Domain::Feature> *>(node->encoder());
+ loco::must_cast<loco::PermutingEncoder<loco::Domain::Feature> *>(node->encoder());
auto perm = encoder->perm();
return isNHWC(perm);
}
bool visit(loco::FeatureDecode *node) final
{
auto decoder =
- loco::must_cast<loco::PermutingDecoder<loco::Domain::Feature> *>(node->decoder());
+ loco::must_cast<loco::PermutingDecoder<loco::Domain::Feature> *>(node->decoder());
auto perm = decoder->perm();
return isNHWC(perm);
}
#include <oops/InternalExn.h>
-#include <stdex/Memory.h>
-
#include <stdexcept>
#include <type_traits>
#include "Pass/TypeInferencePass.h"
#include <loco/IR/PermutingCodec.h>
-#include <stdex/Memory.h>
#include <gtest/gtest.h>
-using stdex::make_unique;
-
namespace
{
#include <loco.h>
-#include <stdex/Memory.h>
-
#include <cassert>
namespace exo
{
filterEncode = exo::make_filter_encode<exo::FilterLayout::HWIO>(pull); // from Tensorflow
filterDecode =
- exo::make_filter_decode<exo::FilterLayout::OHWI>(filterEncode); // to Tensorflow Lite
+ exo::make_filter_decode<exo::FilterLayout::OHWI>(filterEncode); // to Tensorflow Lite
complete(filterDecode);
}
};
#include <loco.h>
-#include <stdex/Memory.h>
+#include <memory>
#include <gtest/gtest.h>
TypeShapeReadyPhase()
{
// Type and Shape inference is prerequisite for run other test
- _phase.emplace_back(stdex::make_unique<::exo::TypeInferencePass>());
- _phase.emplace_back(stdex::make_unique<::exo::ShapeInferencePass>());
+ _phase.emplace_back(std::make_unique<::exo::TypeInferencePass>());
+ _phase.emplace_back(std::make_unique<::exo::ShapeInferencePass>());
}
- template <typename PassT> void add_pass() { _phase.emplace_back(stdex::make_unique<PassT>()); }
+ template <typename PassT> void add_pass() { _phase.emplace_back(std::make_unique<PassT>()); }
void run(loco::Graph *g)
{
add_library(foder INTERFACE)
target_include_directories(foder INTERFACE include)
+target_link_libraries(foder INTERFACE nncc_coverage)
* limitations under the License.
*/
+#ifndef __FODER_FILE_LOADER_H__
+#define __FODER_FILE_LOADER_H__
+
#include <fstream>
#include <vector>
};
} // namespace foder
+
+#endif // __FODER_FILE_LOADER_H__
set_target_properties(hermes_std PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(hermes_std PUBLIC include)
target_link_libraries(hermes_std PUBLIC hermes)
-target_link_libraries(hermes_std PRIVATE stdex)
target_link_libraries(hermes_std PRIVATE pepper_strcast)
# Let's apply nncc common compile options
#
nnas_find_package(GTest REQUIRED)
GTest_AddTest(hermes_std_test ${TESTS})
-target_link_libraries(hermes_std_test stdex)
target_link_libraries(hermes_std_test hermes_std)
#include "hermes/ConsoleReporter.h"
-#include <stdex/Memory.h>
-
+#include <memory>
#include <sstream>
#include <gtest/gtest.h>
ss << "Hello" << std::endl;
- m.text(stdex::make_unique<hermes::MessageText>(ss));
+ m.text(std::make_unique<hermes::MessageText>(ss));
}
hermes::ConsoleReporter r;
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hermes/EnvConfig.h"
+
+#include <hermes/core/SourceSetting.h>
+
+#include <gtest/gtest.h>
+
+#include <stdlib.h>
+
+namespace
+{
+
+class Logger final : public hermes::Source
+{
+public:
+ Logger() = default;
+ ~Logger() = default;
+};
+
+std::string env_name("TEST_CONFIG");
+
+} // namespace
+
+TEST(EnvConfigTest, constructor)
+{
+ hermes::EnvConfig<hermes::EnvFormat::BooleanNumber> ec(env_name);
+
+ SUCCEED();
+}
+
+TEST(EnvConfigTest, configure)
+{
+ Logger logger;
+ hermes::SourceSetting ss;
+ hermes::EnvConfig<hermes::EnvFormat::BooleanNumber> ec(env_name);
+
+ ec.configure(&logger, ss);
+
+ SUCCEED();
+}
+
+TEST(EnvConfigTest, configure_enabled)
+{
+ setenv(env_name.c_str(), "1", 0);
+
+ Logger logger;
+ hermes::SourceSetting ss;
+ hermes::EnvConfig<hermes::EnvFormat::BooleanNumber> ec(env_name);
+
+ ec.configure(&logger, ss);
+
+ SUCCEED();
+}
add_library(hermes STATIC ${SOURCES})
set_target_properties(hermes PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(hermes PUBLIC include)
-target_link_libraries(hermes PRIVATE stdex)
# Let's apply nncc common compile options
#
# NOTE This will enable strict compilation (warnings as error).
add_executable(hermes_test ${TESTS})
target_link_libraries(hermes_test gtest_main)
-target_link_libraries(hermes_test stdex)
target_link_libraries(hermes_test hermes)
add_test(hermes_test hermes_test)
#include "hermes/core/MessageBuffer.h"
-#include <stdex/Memory.h>
+#include <memory>
namespace hermes
{
{
// NOTE The current implementation is unsafe as it may throw an excpetion.
// TODO Find a better safe implementation.
- auto msg = stdex::make_unique<Message>();
+ auto msg = std::make_unique<Message>();
- msg->text(stdex::make_unique<MessageText>(_ss));
+ msg->text(std::make_unique<MessageText>(_ss));
_bus->post(std::move(msg));
}
#include "hermes/core/Source.h"
-#include <stdex/Memory.h>
-
+#include <memory>
#include <cassert>
namespace hermes
std::unique_ptr<MessageBuffer> Source::buffer(const Severity &) const
{
// TODO Pass Severity
- return stdex::make_unique<MessageBuffer>(_bus);
+ return std::make_unique<MessageBuffer>(_bus);
}
} // namespace hermes
target_include_directories(loco PUBLIC include)
# TODO Remove dependencies on angkor library
target_link_libraries(loco PUBLIC angkor)
-target_link_libraries(loco PRIVATE stdex)
# Let's apply nncc common compile options
#
# NOTE This will enable strict compilation (warnings as error).
nnas_find_package(GTest REQUIRED)
GTest_AddTest(loco_test ${TESTS})
-target_link_libraries(loco_test stdex)
target_link_libraries(loco_test loco)
using Type = int16_t;
};
+template <> struct DataTypeImpl<DataType::U16>
+{
+ // Use C++ uint16_t type for unsigned 16bit integer
+ using Type = uint16_t;
+};
+
template <> struct DataTypeImpl<DataType::S32>
{
// Use C++ int32_t type for 32bit integer
using Type = int64_t;
};
+template <> struct DataTypeImpl<DataType::U64>
+{
+ // Use C++ uint64_t type for unsigned 64bit integer
+ using Type = uint64_t;
+};
+
template <> struct DataTypeImpl<DataType::FLOAT32>
{
// Use C++ float type for IEEE 32-bit floating-point numbers
using Type = float;
};
+template <> struct DataTypeImpl<DataType::FLOAT64>
+{
+ // Use C++ double type for IEEE 64-bit floating-point numbers
+ using Type = double;
+};
+
// NOTE DataTypeImpl for BOOL is subject to change
template <> struct DataTypeImpl<DataType::BOOL>
{
return sizeof(DataTypeImpl<DataType::U8>::Type);
case DataType::S16:
return sizeof(DataTypeImpl<DataType::S16>::Type);
+ case DataType::U16:
+ return sizeof(DataTypeImpl<DataType::U16>::Type);
case DataType::S32:
return sizeof(DataTypeImpl<DataType::S32>::Type);
case DataType::U32:
return sizeof(DataTypeImpl<DataType::U32>::Type);
case DataType::S64:
return sizeof(DataTypeImpl<DataType::S64>::Type);
+ case DataType::U64:
+ return sizeof(DataTypeImpl<DataType::U64>::Type);
case DataType::FLOAT32:
return sizeof(DataTypeImpl<DataType::FLOAT32>::Type);
+ case DataType::FLOAT64:
+ return sizeof(DataTypeImpl<DataType::FLOAT64>::Type);
case DataType::BOOL:
return sizeof(DataTypeImpl<DataType::BOOL>::Type);
default:
* @brief Make a value visible to user
*/
class Push /* to user */ final
- : public CanonicalNodeDef<CanonicalOpcode::Push, FixedArity<1>::Mixin>
+ : public CanonicalNodeDef<CanonicalOpcode::Push, FixedArity<1>::Mixin>
{
public:
Push() = default;
* @brief Create a value from user data
*/
class Pull /* from user */ final
- : public CanonicalNodeDef<CanonicalOpcode::Pull, FixedArity<0>::Mixin,
- With<NodeTrait::TensorShape>::Mixin>
+ : public CanonicalNodeDef<CanonicalOpcode::Pull, FixedArity<0>::Mixin,
+ With<NodeTrait::TensorShape>::Mixin>
{
public:
Pull() = default;
* }
*/
class ConstGen final
- : public CanonicalNodeDef<CanonicalOpcode::ConstGen, FixedArity<0>::Mixin,
- With<NodeTrait::DataType>::Mixin, With<NodeTrait::TensorShape>::Mixin>
+ : public CanonicalNodeDef<CanonicalOpcode::ConstGen, FixedArity<0>::Mixin,
+ With<NodeTrait::DataType>::Mixin, With<NodeTrait::TensorShape>::Mixin>
{
public:
ConstGen() = default;
* @brief Create a feature map from a tensor
*/
class FeatureEncode final
- : public CanonicalNodeDef<CanonicalOpcode::FeatureEncode, FixedArity<1>::Mixin>
+ : public CanonicalNodeDef<CanonicalOpcode::FeatureEncode, FixedArity<1>::Mixin>
{
public:
Node *input(void) const { return at(0)->node(); }
* @brief Create a tensor from a feature map
*/
class FeatureDecode final
- : public CanonicalNodeDef<CanonicalOpcode::FeatureDecode, FixedArity<1>::Mixin>
+ : public CanonicalNodeDef<CanonicalOpcode::FeatureDecode, FixedArity<1>::Mixin>
{
public:
Node *input(void) const { return at(0)->node(); }
* @brief Create a filter from a tensor
*/
class FilterEncode final
- : public CanonicalNodeDef<CanonicalOpcode::FilterEncode, FixedArity<1>::Mixin>
+ : public CanonicalNodeDef<CanonicalOpcode::FilterEncode, FixedArity<1>::Mixin>
{
public:
Node *input(void) const { return at(0)->node(); }
* @brief Create a tensor from a filter
*/
class FilterDecode final
- : public CanonicalNodeDef<CanonicalOpcode::FilterDecode, FixedArity<1>::Mixin>
+ : public CanonicalNodeDef<CanonicalOpcode::FilterDecode, FixedArity<1>::Mixin>
{
public:
Node *input(void) const { return at(0)->node(); }
* @brief Create a depthwise filter from a tensor
*/
class DepthwiseFilterEncode final
- : public CanonicalNodeDef<CanonicalOpcode::DepthwiseFilterEncode, FixedArity<1>::Mixin>
+ : public CanonicalNodeDef<CanonicalOpcode::DepthwiseFilterEncode, FixedArity<1>::Mixin>
{
public:
Node *input(void) const { return at(0)->node(); }
* @brief Create a tensor from a depthwise filter
*/
class DepthwiseFilterDecode final
- : public CanonicalNodeDef<CanonicalOpcode::DepthwiseFilterDecode, FixedArity<1>::Mixin>
+ : public CanonicalNodeDef<CanonicalOpcode::DepthwiseFilterDecode, FixedArity<1>::Mixin>
{
public:
Node *input(void) const { return at(0)->node(); }
*/
template <>
class Reshape<ReshapeType::Fixed> final
- : public CanonicalNodeDef<CanonicalOpcode::FixedReshape, FixedArity<1>::Mixin,
- With<NodeTrait::TensorShape>::Mixin>
+ : public CanonicalNodeDef<CanonicalOpcode::FixedReshape, FixedArity<1>::Mixin,
+ With<NodeTrait::TensorShape>::Mixin>
{
public:
Node *input(void) const { return at(0)->node(); }
* concatenated along the given axis.
*/
class TensorConcat final
- : public CanonicalNodeDef<CanonicalOpcode::TensorConcat, FixedArity<2>::Mixin>
+ : public CanonicalNodeDef<CanonicalOpcode::TensorConcat, FixedArity<2>::Mixin>
{
public:
Node *lhs(void) const { return at(0)->node(); }
* @brief Depthwise 2D Convolution
*/
class DepthwiseConv2D final
- : public CanonicalNodeDef<CanonicalOpcode::DepthwiseConv2D, FixedArity<2>::Mixin>
+ : public CanonicalNodeDef<CanonicalOpcode::DepthwiseConv2D, FixedArity<2>::Mixin>
{
public:
Node *ifm(void) const { return at(0)->node(); }
* @note All the reduce functions always keep dimensions
*/
class TensorReduce final
- : public CanonicalNodeDef<CanonicalOpcode::TensorReduce, FixedArity<1>::Mixin>
+ : public CanonicalNodeDef<CanonicalOpcode::TensorReduce, FixedArity<1>::Mixin>
{
public:
Node *input(void) const { return at(0)->node(); }
* With this, output shape is uniquely determined by all inputs and attributes.
*/
class TransposedConv2D final
- : public CanonicalNodeDef<CanonicalOpcode::TransposedConv2D, FixedArity<2>::Mixin>
+ : public CanonicalNodeDef<CanonicalOpcode::TransposedConv2D, FixedArity<2>::Mixin>
{
public:
Node *ifm(void) const { return at(0)->node(); }
template <Domain D> class Softmax;
/**
-* @brief Computes softmax activations for Tensor domain
-*/
+ * @brief Computes softmax activations for Tensor domain
+ */
template <>
class Softmax<Domain::Tensor> final
- : public CanonicalNodeDef<CanonicalOpcode::TensorSoftmax, FixedArity<1>::Mixin>
+ : public CanonicalNodeDef<CanonicalOpcode::TensorSoftmax, FixedArity<1>::Mixin>
{
public:
Softmax() = default;
*/
template <>
class BiasAdd<Domain::Tensor> final
- : public CanonicalNodeDef<CanonicalOpcode::TensorBiasAdd, FixedArity<2>::Mixin>
+ : public CanonicalNodeDef<CanonicalOpcode::TensorBiasAdd, FixedArity<2>::Mixin>
{
public:
BiasAdd() = default;
*/
template <>
class BiasAdd<Domain::Feature> final
- : public CanonicalNodeDef<CanonicalOpcode::FeatureBiasAdd, FixedArity<2>::Mixin>
+ : public CanonicalNodeDef<CanonicalOpcode::FeatureBiasAdd, FixedArity<2>::Mixin>
{
public:
BiasAdd() = default;
* [padding.front(0) + 1 + padding.back(0), padding.front(1) + 2 + padding.back(1)] = [4,9].
*/
class TensorConstantPad final
- : public CanonicalNodeDef<CanonicalOpcode::TensorConstantPad, FixedArity<2>::Mixin>
+ : public CanonicalNodeDef<CanonicalOpcode::TensorConstantPad, FixedArity<2>::Mixin>
{
public:
Node *input(void) const { return at(0)->node(); }
* @brief Elementwise Sqrt of input
*/
class EltwiseSqrt final
- : public CanonicalNodeDef<CanonicalOpcode::EltwiseSqrt, FixedArity<1>::Mixin>
+ : public CanonicalNodeDef<CanonicalOpcode::EltwiseSqrt, FixedArity<1>::Mixin>
{
public:
EltwiseSqrt() = default;
* TODO Explain the operation semantics
*/
class TensorBroadcast final
- : public CanonicalNodeDef<CanonicalOpcode::TensorBroadcast, FixedArity<1>::Mixin>
+ : public CanonicalNodeDef<CanonicalOpcode::TensorBroadcast, FixedArity<1>::Mixin>
{
public:
TensorBroadcast() = default;
* MatrixEncode currently requires a rank-2 Tensor as its input.
*/
class MatrixEncode final
- : public CanonicalNodeDef<CanonicalOpcode::MatrixEncode, FixedArity<1>::Mixin>
+ : public CanonicalNodeDef<CanonicalOpcode::MatrixEncode, FixedArity<1>::Mixin>
{
public:
MatrixEncode() = default;
* MatrixDecode currently requires a Matrix as its input.
*/
class MatrixDecode final
- : public CanonicalNodeDef<CanonicalOpcode::MatrixDecode, FixedArity<1>::Mixin>
+ : public CanonicalNodeDef<CanonicalOpcode::MatrixDecode, FixedArity<1>::Mixin>
{
public:
MatrixDecode() = default;
* Input and output belong to tensor domain.
*/
class TensorTranspose final
- : public CanonicalNodeDef<CanonicalOpcode::TensorTranspose, FixedArity<1>::Mixin>
+ : public CanonicalNodeDef<CanonicalOpcode::TensorTranspose, FixedArity<1>::Mixin>
{
public:
TensorTranspose() = default;
public:
Padding2D(uint32_t top, uint32_t bottom, uint32_t left, uint32_t right)
- : _top{top}, _bottom{bottom}, _left{left}, _right{right}
+ : _top{top}, _bottom{bottom}, _left{left}, _right{right}
{
// DO NOTHING
}
--- /dev/null
+require("angkor")
#include "loco/ADT/AnnotatedItem.h"
#include <gtest/gtest.h>
-#include <stdex/Memory.h>
+
+#include <memory>
namespace
{
{
static std::unique_ptr<DerivedAnnotation<N>> make(void)
{
- return stdex::make_unique<DerivedAnnotation<N>>();
+ return std::make_unique<DerivedAnnotation<N>>();
}
};
#include "loco/IR/Graph.h"
#include "loco/IR/Nodes.h"
-#include <stdex/Memory.h>
-
+#include <memory>
#include <cassert>
#include <stdexcept>
CanonicalDialect::CanonicalDialect()
{
- service<GraphOutputIndexQueryService>(stdex::make_unique<GraphOutputIndexQueryServiceImpl>());
+ service<GraphOutputIndexQueryService>(std::make_unique<GraphOutputIndexQueryServiceImpl>());
}
Dialect *CanonicalDialect::get(void)
#include "loco/IR/Dialect.h"
-#include <stdex/Memory.h>
+#include <memory>
#include <gtest/gtest.h>
struct MockDialect final : public loco::Dialect
{
- MockDialect() { service<S1>(stdex::make_unique<S1>()); }
+ MockDialect() { service<S1>(std::make_unique<S1>()); }
};
MockDialect dialect;
#include "loco/IR/Graph.h"
-#include <stdex/Memory.h>
-
+#include <memory>
#include <cassert>
namespace
std::unique_ptr<loco::TensorShape> make_tensor_shape(std::initializer_list<loco::Dimension> dims)
{
- auto tensor_shape = stdex::make_unique<loco::TensorShape>();
+ auto tensor_shape = std::make_unique<loco::TensorShape>();
tensor_shape->rank(dims.size());
{
shape(make_tensor_shape(dims));
}
-GraphInput *Graph::InputContext::create(void)
-{
- return take(stdex::make_unique<GraphInput>(size()));
-}
+GraphInput *Graph::InputContext::create(void) { return take(std::make_unique<GraphInput>(size())); }
GraphOutput *Graph::OutputContext::create(void)
{
- return take(stdex::make_unique<GraphOutput>(size()));
+ return take(std::make_unique<GraphOutput>(size()));
}
std::set<loco::Node *> all_nodes(loco::Graph *g)
{
// temp node with multple params for ctor. loco::CanonicalOpcode::ReLU is used for simplicity
class ParamCtorNode
- : public loco::CanonicalNodeDef<loco::CanonicalOpcode::ReLU, loco::FixedArity<0>::Mixin>
+ : public loco::CanonicalNodeDef<loco::CanonicalOpcode::ReLU, loco::FixedArity<0>::Mixin>
{
public:
ParamCtorNode(int i, float f)
#include "loco/IR/PermutingCodec.h"
-#include <stdex/Memory.h>
-
+#include <memory>
#include <cassert>
#include <set>
#include <stdexcept>
std::unique_ptr<FeatureEncoder> PermutingEncoder<Domain::Feature>::clone(void) const
{
- return stdex::make_unique<PermutingEncoder<Domain::Feature>>(_perm);
+ return std::make_unique<PermutingEncoder<Domain::Feature>>(_perm);
}
bool PermutingEncoder<Domain::Feature>::valid(void) const { return ::valid(_perm); }
std::unique_ptr<FeatureDecoder> PermutingDecoder<Domain::Feature>::clone(void) const
{
- return stdex::make_unique<PermutingDecoder<Domain::Feature>>(_perm);
+ return std::make_unique<PermutingDecoder<Domain::Feature>>(_perm);
}
bool PermutingDecoder<Domain::Feature>::valid(void) const { return ::valid(_perm); }
#include <gtest/gtest.h>
-#include <stdex/Memory.h>
+#include <memory>
#include <vector>
-using stdex::make_unique;
+using std::make_unique;
TEST(VerifierTest, valid_minimal)
{
for (uint32_t axis = 0; axis < out_shape.rank(); ++axis)
{
out_shape.dim(axis) =
- tensor_shape.dim(axis).value() + padding->front(axis) + padding->back(axis);
+ tensor_shape.dim(axis).value() + padding->front(axis) + padding->back(axis);
}
return loco::NodeShape{out_shape};
testcase.pull_node->shape({1, 8, 4, 3});
- testcase.encode_node->encoder(stdex::make_unique<PermutingEncoder<Domain::Feature>>(perm));
+ testcase.encode_node->encoder(std::make_unique<PermutingEncoder<Domain::Feature>>(perm));
testcase.avgpool2d_node->window()->vertical(2);
testcase.avgpool2d_node->window()->horizontal(2);
testcase.avgpool2d_node->stride()->vertical(2);
testcase.avgpool2d_node->stride()->horizontal(2);
- testcase.decode_node->decoder(stdex::make_unique<PermutingDecoder<Domain::Feature>>(perm));
+ testcase.decode_node->decoder(std::make_unique<PermutingDecoder<Domain::Feature>>(perm));
// Run Inference
loco::CanonicalShapeInferenceRule rule;
testcase.pull_node->shape({1, 8, 4, 3});
- testcase.encode_node->encoder(stdex::make_unique<PermutingEncoder<Domain::Feature>>(perm));
+ testcase.encode_node->encoder(std::make_unique<PermutingEncoder<Domain::Feature>>(perm));
testcase.maxpool2d_node->window()->vertical(2);
testcase.maxpool2d_node->window()->horizontal(2);
testcase.maxpool2d_node->stride()->vertical(2);
testcase.maxpool2d_node->stride()->horizontal(2);
- testcase.decode_node->decoder(stdex::make_unique<PermutingDecoder<Domain::Feature>>(perm));
+ testcase.decode_node->decoder(std::make_unique<PermutingDecoder<Domain::Feature>>(perm));
// Run Inference
loco::CanonicalShapeInferenceRule rule;
// loco-internal headers
#include "loco/IR/Graph.h"
-// repo-internal headers
-#include <stdex/Memory.h>
-
// C++ standard headers
+#include <memory>
#include <stack>
//
// "Layer" is in theory a subgraph builder.
template <typename Layer, typename... Args>
auto push(Args &&... args)
- -> decltype(static_cast<Layer *>(nullptr)->operator()(static_cast<Context *>(nullptr)))
+ -> decltype(static_cast<Layer *>(nullptr)->operator()(static_cast<Context *>(nullptr)))
{
Layer layer{std::forward<Args>(args)...};
return layer(ctx());
static inline std::unique_ptr<GraphBuilder> make_graph_builder(loco::Graph *g)
{
- return stdex::make_unique<GraphBuilder>(g);
+ return std::make_unique<GraphBuilder>(g);
}
// "InputLayer" creates both GraphInput and Pull node at once
ctx->stack()->push(pull_node);
- return stdex::make_unique<Return>(graph_input, pull_node);
+ return std::make_unique<Return>(graph_input, pull_node);
}
};
ctx->stack()->push(push_node);
- return stdex::make_unique<Return>(graph_output, push_node);
+ return std::make_unique<Return>(graph_output, push_node);
}
};
ctx->stack()->push(relu_node);
- return stdex::make_unique<Return>(relu_node);
+ return std::make_unique<Return>(relu_node);
}
};
ctx->stack()->push(const_node);
- return stdex::make_unique<Return>(const_node);
+ return std::make_unique<Return>(const_node);
}
};
Return *perm(const loco::Permutation<loco::Domain::Feature> &perm)
{
using namespace loco;
- _node->encoder(stdex::make_unique<PermutingEncoder<Domain::Feature>>(perm));
+ _node->encoder(std::make_unique<PermutingEncoder<Domain::Feature>>(perm));
return this;
}
ctx->stack()->push(encode_node);
- return stdex::make_unique<Return>(encode_node);
+ return std::make_unique<Return>(encode_node);
}
};
Return *perm(const loco::Permutation<loco::Domain::Feature> &perm)
{
using namespace loco;
- _node->decoder(stdex::make_unique<PermutingDecoder<Domain::Feature>>(perm));
+ _node->decoder(std::make_unique<PermutingDecoder<Domain::Feature>>(perm));
return this;
}
ctx->stack()->push(decode_node);
- return stdex::make_unique<Return>(decode_node);
+ return std::make_unique<Return>(decode_node);
}
};
public:
Return *perm(const loco::Permutation<loco::Domain::Filter> &perm)
{
- auto encoder = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
+ auto encoder = std::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
encoder->perm(perm);
_node->encoder(std::move(encoder));
return this;
ctx->stack()->push(encode_node);
- return stdex::make_unique<Return>(encode_node);
+ return std::make_unique<Return>(encode_node);
}
};
Return *perm(const loco::Permutation<loco::Domain::DepthwiseFilter> &perm)
{
using namespace loco;
- _node->encoder(stdex::make_unique<PermutingEncoder<Domain::DepthwiseFilter>>(perm));
+ _node->encoder(std::make_unique<PermutingEncoder<Domain::DepthwiseFilter>>(perm));
return this;
}
ctx->stack()->push(encode_node);
- return stdex::make_unique<Return>(encode_node);
+ return std::make_unique<Return>(encode_node);
}
};
ctx->stack()->push(depthwiseconv2d_node);
- return stdex::make_unique<Return>(depthwiseconv2d_node);
+ return std::make_unique<Return>(depthwiseconv2d_node);
}
};
ctx->stack()->push(tr_conv2d_node);
- return stdex::make_unique<Return>(tr_conv2d_node);
+ return std::make_unique<Return>(tr_conv2d_node);
}
};
ctx->stack()->push(reshape_node);
- return stdex::make_unique<Return>(reshape_node);
+ return std::make_unique<Return>(reshape_node);
}
};
broadcast_node->input(ctx->stack()->pop());
ctx->stack()->push(broadcast_node);
- return stdex::make_unique<Return>(broadcast_node);
+ return std::make_unique<Return>(broadcast_node);
}
};
#include "GraphBuilder.h"
-#include <stdex/Memory.h>
-
enum class GraphCode
{
Identity,
const_node = graph_builder->push<ConstGenLayer>()->node();
filter_encode_node =
- graph_builder->push<DepthwiseFilterEncodeLayer>()->perm(filter_perm)->node();
+ graph_builder->push<DepthwiseFilterEncodeLayer>()->perm(filter_perm)->node();
depthwiseconv2d_node = graph_builder->push<DepthwiseConv2DLayer>()->node();
loco::MultiDialectShapeInferenceRule rules;
rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
- .bind(TestDialect<2, 3>::get(), &t23_rule)
- .bind(TestDialect<4, 5>::get(), &t45_rule);
+ .bind(TestDialect<2, 3>::get(), &t23_rule)
+ .bind(TestDialect<4, 5>::get(), &t45_rule);
loco::apply(&rules).to(g.get());
#include "loco/IR/Algorithm.h"
#include <cassert>
-
-#include <stdex/Memory.h>
+#include <memory>
namespace
{
{
if (_rule->infer(node, shape))
{
- node->annot(stdex::make_unique<ShapeAnnotation>(shape));
+ node->annot(std::make_unique<ShapeAnnotation>(shape));
changed = true;
}
}
#include "loco/IR/Algorithm.h"
#include <cassert>
-
-#include <stdex/Memory.h>
+#include <memory>
namespace
{
{
if (_rule->infer(node, dtype))
{
- node->annot(stdex::make_unique<DataTypeAnnotation>(dtype));
+ node->annot(std::make_unique<DataTypeAnnotation>(dtype));
changed = true;
}
}
loco::MultiDialectTypeInferenceRule rules;
rules.bind(TestDialect<loco::DataType::S8>::get(), &s8_rule)
- .bind(TestDialect<loco::DataType::U8>::get(), &u8_rule)
- .bind(loco::CanonicalDialect::get(), &canon_rule);
+ .bind(TestDialect<loco::DataType::U8>::get(), &u8_rule)
+ .bind(loco::CanonicalDialect::get(), &canon_rule);
loco::apply(&rules).to(g.get());
#include <gtest/gtest.h>
-#include <stdex/Memory.h>
+#include <memory>
-using stdex::make_unique;
+using std::make_unique;
namespace
{
return HWIO;
}
-} // nemaspace
+} // namespace
#if 0
>>> MaxPool_Float_000 testcase
add_library(locoex_customop SHARED ${SOURCES})
target_include_directories(locoex_customop PUBLIC include)
target_link_libraries(locoex_customop PUBLIC loco)
-target_link_libraries(locoex_customop PRIVATE stdex locop pepper_str)
+target_link_libraries(locoex_customop PRIVATE locop pepper_str)
install(TARGETS locoex_customop DESTINATION lib)
if(NOT ENABLE_TEST)
nnas_find_package(GTest REQUIRED)
GTest_AddTest(locoex_customop_test ${TESTS})
-target_link_libraries(locoex_customop_test loco locoex_customop stdex)
+target_link_libraries(locoex_customop_test loco locoex_customop)
require("loco")
-require("stdex")
require("locop")
require("pepper-str")
#define INSTANTIATE(AT) \
template const typename AttrTypeTrait<AT>::Type *COpCall::attr<AT>(const std::string &attr_name) \
- const;
+ const;
INSTANTIATE(COpAttrType::Float)
INSTANTIATE(COpAttrType::Int)
#include <loco/IR/Graph.h>
#include <loco/IR/Nodes.h>
-#include <stdex/Memory.h>
+#include <memory>
#include <gtest/gtest.h>
custom->input(0, inp);
custom->input(1, inp);
- custom->attr(int_attr, stdex::make_unique<COpAttrInt>(int_val));
- custom->attr(float_attr, stdex::make_unique<COpAttrFloat>(float_val));
+ custom->attr(int_attr, std::make_unique<COpAttrInt>(int_val));
+ custom->attr(float_attr, std::make_unique<COpAttrFloat>(float_val));
}
// access custom op input
public:
BinaryInputNode() : TestNode(2) {}
};
-}
+} // namespace
TEST(CustomOpTest, VariadicArityNode_arity_0)
{
target_include_directories(locomotiv PRIVATE src)
target_link_libraries(locomotiv PUBLIC loco)
target_link_libraries(locomotiv PUBLIC angkor)
-target_link_libraries(locomotiv PRIVATE stdex)
# Let's apply nncc common compile options
#
# NOTE This will enable strict compilation (warnings as error).
* @warn This approach may fail in case of graph with control flow
*/
Session(loco::Graph *g, const std::vector<loco::Node *> &custom_outputs)
- : _graph(g), _outputs(custom_outputs)
+ : _graph(g), _outputs(custom_outputs)
{
// DO NOTHING
}
require("angkor")
-require("stdex")
const uint32_t pad_right = avgpool2d->pad()->right();
const uint32_t output_height =
- compute_out_size(ifm_height, pad_top + pad_bottom, window_height, stride_height);
+ compute_out_size(ifm_height, pad_top + pad_bottom, window_height, stride_height);
const uint32_t output_width =
- compute_out_size(ifm_width, pad_left + pad_right, window_width, stride_width);
+ compute_out_size(ifm_width, pad_left + pad_right, window_width, stride_width);
// prepare output buffer
Shape output_shape{batches, output_height, output_width, depth};
ASSERT_TRUE(*(avgpool2d_data->shape()) == ofm_shape);
auto ofm_overlay =
- make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
+ make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance())
{
const auto &ind = e.current();
validate(input_data && bias_data, "Input not ready");
validate(locomotiv::annot_domain(bias_add->value()) == loco::Domain::Tensor &&
- locomotiv::annot_domain(bias_add->bias()) == loco::Domain::Bias,
+ locomotiv::annot_domain(bias_add->bias()) == loco::Domain::Bias,
"Wrong input domain");
std::unique_ptr<NodeData> bias_add_data = calc(input_data, bias_data, bias_add->axis());
validate(input_data && bias_data, "Input not ready");
validate(locomotiv::annot_domain(bias_add->value()) == loco::Domain::Feature &&
- locomotiv::annot_domain(bias_add->bias()) == loco::Domain::Bias,
+ locomotiv::annot_domain(bias_add->bias()) == loco::Domain::Bias,
"Wrong input domain");
std::unique_ptr<NodeData> bias_add_data = calc(input_data, bias_data, 3);
const uint32_t pad_right = conv2d->pad()->right();
const uint32_t output_height =
- compute_out_size(input_height + pad_top + pad_bottom, filter_height, stride_height);
+ compute_out_size(input_height + pad_top + pad_bottom, filter_height, stride_height);
const uint32_t output_width =
- compute_out_size(input_width + pad_left + pad_right, filter_width, stride_width);
+ compute_out_size(input_width + pad_left + pad_right, filter_width, stride_width);
const uint32_t batches = input_shape.dim(0);
const uint32_t input_depth = input_shape.dim(3);
((unsigned)in_y < input_height))
{
auto input_value =
- input_buf->at(Index({batch, (unsigned)in_y, (unsigned)in_x, in_channel}));
+ input_buf->at(Index({batch, (unsigned)in_y, (unsigned)in_x, in_channel}));
auto filter_value =
- filter_buf->at(Index({out_channel, filter_y, filter_x, in_channel}));
+ filter_buf->at(Index({out_channel, filter_y, filter_x, in_channel}));
total += (input_value * filter_value);
}
}
ASSERT_TRUE(*(conv2d_result->shape()) == ofm_shape);
auto ofm_overlay =
- make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
+ make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance())
{
const auto &ind = e.current();
const uint32_t pad_right = dw_conv2d->pad()->right();
const uint32_t ofm_height =
- compute_out_size(ifm_height, pad_top + pad_bottom, ker_height, stride_height);
+ compute_out_size(ifm_height, pad_top + pad_bottom, ker_height, stride_height);
const uint32_t ofm_width =
- compute_out_size(ifm_width, pad_left + pad_right, ker_width, stride_width);
+ compute_out_size(ifm_width, pad_left + pad_right, ker_width, stride_width);
const uint32_t batches = ifm_shape.dim(0);
const uint32_t ifm_depth = ifm_shape.dim(3);
ASSERT_TRUE(*(dw_conv2d_result->shape()) == ofm_shape);
auto ofm_overlay =
- make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
+ make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance())
{
const auto &ind = e.current();
// Make HWCM (i.e. height, width, depth, multiplier) buffer from DepthwiseFilterShape
Buffer<T> node_buf = make_buffer<T, LexicalLayout>(
- Shape{node_shape.height().value(), node_shape.width().value(), node_shape.depth().value(),
- node_shape.multiplier().value()});
+ Shape{node_shape.height().value(), node_shape.width().value(), node_shape.depth().value(),
+ node_shape.multiplier().value()});
// Copy buffer in an order arranged by encoder
for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance())
// Encoder to correctly read input tensor as MHWC
auto encoder = std::unique_ptr<loco::PermutingEncoder<loco::Domain::DepthwiseFilter>>(
- new loco::PermutingEncoder<loco::Domain::DepthwiseFilter>);
+ new loco::PermutingEncoder<loco::Domain::DepthwiseFilter>);
encoder->perm()->axis(loco::DepthwiseFilterAxis::Multiplier) = 0;
encoder->perm()->axis(loco::DepthwiseFilterAxis::Height) = 1;
encoder->perm()->axis(loco::DepthwiseFilterAxis::Width) = 2;
const loco::Permutation<loco::Domain::Feature> &perm)
{
auto encoder = std::unique_ptr<loco::PermutingEncoder<loco::Domain::Feature>>(
- new loco::PermutingEncoder<loco::Domain::Feature>);
+ new loco::PermutingEncoder<loco::Domain::Feature>);
encoder->perm(perm);
const loco::Permutation<loco::Domain::Feature> &perm)
{
auto decoder = std::unique_ptr<loco::PermutingDecoder<loco::Domain::Feature>>(
- new loco::PermutingDecoder<loco::Domain::Feature>);
+ new loco::PermutingDecoder<loco::Domain::Feature>);
decoder->perm(perm);
// Make tensor buffer from TensorShape
Buffer<T> node_buf =
- make_buffer<T, LexicalLayout>(Shape{node_shape.dim(0).value(), node_shape.dim(1).value(),
- node_shape.dim(2).value(), node_shape.dim(3).value()});
+ make_buffer<T, LexicalLayout>(Shape{node_shape.dim(0).value(), node_shape.dim(1).value(),
+ node_shape.dim(2).value(), node_shape.dim(3).value()});
// Copy buffer in an order arranged by decoder
for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance())
// Make NHWC buffer from FeatureShape
Buffer<T> node_buf =
- make_buffer<T, LexicalLayout>(Shape{node_shape.count().value(), node_shape.height().value(),
- node_shape.width().value(), node_shape.depth().value()});
+ make_buffer<T, LexicalLayout>(Shape{node_shape.count().value(), node_shape.height().value(),
+ node_shape.width().value(), node_shape.depth().value()});
// Copy buffer in an order arranged by encoder
for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance())
// Make NHWC buffer from FilterShape
Buffer<T> node_buf =
- make_buffer<T, LexicalLayout>(Shape{node_shape.count().value(), node_shape.height().value(),
- node_shape.width().value(), node_shape.depth().value()});
+ make_buffer<T, LexicalLayout>(Shape{node_shape.count().value(), node_shape.height().value(),
+ node_shape.width().value(), node_shape.depth().value()});
// Copy buffer in an order arranged by encoder
for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance())
// Encoder to correctly read input tensor as NCHW
auto encoder = std::unique_ptr<loco::PermutingEncoder<loco::Domain::Filter>>(
- new loco::PermutingEncoder<loco::Domain::Filter>);
+ new loco::PermutingEncoder<loco::Domain::Filter>);
encoder->perm()->axis(loco::FilterAxis::Count) = 0;
encoder->perm()->axis(loco::FilterAxis::Depth) = 1;
encoder->perm()->axis(loco::FilterAxis::Height) = 2;
// Encoder to correctly read input tensor as CHNW
auto encoder = std::unique_ptr<loco::PermutingEncoder<loco::Domain::Filter>>(
- new loco::PermutingEncoder<loco::Domain::Filter>);
+ new loco::PermutingEncoder<loco::Domain::Filter>);
encoder->perm()->axis(loco::FilterAxis::Depth) = 0;
encoder->perm()->axis(loco::FilterAxis::Height) = 1;
encoder->perm()->axis(loco::FilterAxis::Count) = 2;
const loco::Permutation<loco::Domain::Matrix> &perm)
{
auto encoder = std::unique_ptr<loco::PermutingEncoder<loco::Domain::Matrix>>(
- new loco::PermutingEncoder<loco::Domain::Matrix>);
+ new loco::PermutingEncoder<loco::Domain::Matrix>);
encoder->perm(perm);
const loco::Permutation<loco::Domain::Matrix> &perm)
{
auto decoder = std::unique_ptr<loco::PermutingDecoder<loco::Domain::Matrix>>(
- new loco::PermutingDecoder<loco::Domain::Matrix>);
+ new loco::PermutingDecoder<loco::Domain::Matrix>);
decoder->perm(perm);
// Make tensor buffer from TensorShape
Buffer<T> node_buf =
- make_buffer<T, LexicalLayout>(Shape{node_shape.dim(0).value(), node_shape.dim(1).value()});
+ make_buffer<T, LexicalLayout>(Shape{node_shape.dim(0).value(), node_shape.dim(1).value()});
// Copy buffer in an order arranged by decoder
for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance())
// Make HW buffer from MatrixShape
Buffer<T> node_buf =
- make_buffer<T, LexicalLayout>(Shape{node_shape.height().value(), node_shape.width().value()});
+ make_buffer<T, LexicalLayout>(Shape{node_shape.height().value(), node_shape.width().value()});
// Copy buffer in an order arranged by encoder
for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance())
const uint32_t pad_right = maxpool2d->pad()->right();
const uint32_t output_height =
- compute_out_size(ifm_height, pad_top + pad_bottom, window_height, stride_height);
+ compute_out_size(ifm_height, pad_top + pad_bottom, window_height, stride_height);
const uint32_t output_width =
- compute_out_size(ifm_width, pad_left + pad_right, window_width, stride_width);
+ compute_out_size(ifm_width, pad_left + pad_right, window_width, stride_width);
// prepare output buffer
Shape output_shape{batches, output_height, output_width, depth};
ASSERT_TRUE(*(maxpool2d_data->shape()) == ofm_shape);
auto ofm_overlay =
- make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
+ make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance())
{
const auto &ind = e.current();
validate(lhs_data->dtype() == rhs_data->dtype(), "lhs and rhs of Concat should have same dtype");
validate(annot_domain(tensor_concat->lhs()) == loco::Domain::Tensor &&
- annot_domain(tensor_concat->rhs()) == loco::Domain::Tensor,
+ annot_domain(tensor_concat->rhs()) == loco::Domain::Tensor,
"Some ingredients of TensorConcat is not Tensor");
// Calculate output shape
locomotiv::validate(input_shape.rank() == 4, "ifm rank must be 4");
locomotiv::validate(filter_shape.rank() == 4, "filter rank must be 4");
locomotiv::validate(input_shape.dim(3) /* depth of input */ ==
- filter_shape.dim(3) /* depth of filter */,
+ filter_shape.dim(3) /* depth of filter */,
"channel value mismatch");
const uint32_t input_height = input_shape.dim(1);
// TODO Support dilations
const uint32_t output_height =
- compute_transposed_out_size(input_height, pad_top + pad_bottom, filter_height, stride_height);
+ compute_transposed_out_size(input_height, pad_top + pad_bottom, filter_height, stride_height);
const uint32_t output_width =
- compute_transposed_out_size(input_width, pad_left + pad_right, filter_width, stride_width);
+ compute_transposed_out_size(input_width, pad_left + pad_right, filter_width, stride_width);
const uint32_t batches = input_shape.dim(0);
const uint32_t input_depth = input_shape.dim(3);
{
auto input_value = input_buf->at(Index({batch, in_y, in_x, in_channel}));
auto filter_value =
- filter_buf->at(Index({out_channel, filter_y, filter_x, in_channel}));
+ filter_buf->at(Index({out_channel, filter_y, filter_x, in_channel}));
output_buf.at(Index({batch, (unsigned)out_y, (unsigned)out_x, out_channel})) +=
- input_value * filter_value;
+ input_value * filter_value;
}
}
}
ASSERT_TRUE(*(conv2d_result->shape()) == ofm_shape);
auto ofm_overlay =
- make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
+ make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance())
{
const auto &ind = e.current();
#include "NodeDataImpl.h"
-#include <stdex/Memory.h>
-
+#include <memory>
#include <cassert>
namespace
void annot_data(loco::Node *node, std::unique_ptr<NodeData> &&data)
{
- node->annot(stdex::make_unique<NodeDataAnnotation>(std::move(data)));
+ node->annot(std::make_unique<NodeDataAnnotation>(std::move(data)));
}
const NodeData *annot_data(const loco::Node *node)
return dynamic_cast<Derived *>(node);
}
-// clang-format off
+ // clang-format off
/**
* @brief Calculate for one specified node and update its result as NodeData.
* Abort program when its ingredients are not ready or not supported.
#include "UserData.h"
-#include <stdex/Memory.h>
-
+#include <memory>
#include <cassert>
namespace
void user_data(loco::Node *node, std::unique_ptr<NodeData> &&data)
{
- node->annot(stdex::make_unique<UserDataAnnotation>(std::move(data)));
+ node->annot(std::make_unique<UserDataAnnotation>(std::move(data)));
}
void erase_user_data(loco::Node *node) { node->annot<UserDataAnnotation>(nullptr); }
target_link_libraries(locop PRIVATE nncc_common)
target_link_libraries(locop PUBLIC nncc_coverage)
target_link_libraries(locop PRIVATE pp)
-target_link_libraries(locop PRIVATE stdex)
if(NOT ENABLE_TEST)
return()
nnas_find_package(GTest REQUIRED)
GTest_AddTest(locop_test ${TESTS})
-target_link_libraries(locop_test stdex)
target_link_libraries(locop_test locop)
#include <pp/Format.h>
-#include <stdex/Memory.h>
-
#include <map>
#include <set>
#include <loco.h>
-#include <stdex/Memory.h>
+#include <memory>
namespace
{
push->from(pull);
- auto res = stdex::make_unique<Bundle<PullPush>>();
+ auto res = std::make_unique<Bundle<PullPush>>();
res->g = std::move(g);
res->pull = pull;
#include <pp/Format.h>
-#include <stdex/Memory.h>
-
+#include <memory>
#include <map>
#include <set>
else
{
// Use Built-in NodeSummaryBuilder otherwise
- node_summary_builder = stdex::make_unique<GenericNodeSummaryBuilder>(&symbols);
+ node_summary_builder = std::make_unique<GenericNodeSummaryBuilder>(&symbols);
}
// Print Graph Input(s)
#include "locop/FormattedGraph.h"
#include "ExampleGraph.h"
-#include <stdex/Memory.h>
+#include <memory>
#include <gtest/gtest.h>
auto bundle = make_bundle<PullPush>();
auto g = bundle->graph();
{
- bundle->push->annot(stdex::make_unique<MyAnnotation>());
+ bundle->push->annot(std::make_unique<MyAnnotation>());
}
struct MyBuilder final : public locop::NodeSummaryBuilder
{
std::unique_ptr<locop::NodeSummaryBuilder> create(const locop::SymbolTable *) const final
{
- return stdex::make_unique<MyBuilder>();
+ return std::make_unique<MyBuilder>();
}
};
- std::cout << locop::fmt<locop::LinearV1>(g).with(stdex::make_unique<MyFactory>()) << std::endl;
+ std::cout << locop::fmt<locop::LinearV1>(g).with(std::make_unique<MyFactory>()) << std::endl;
// TODO Check whether MyBuilder actually sees all the nodes in a graph
SUCCEED();
{
std::unique_ptr<locop::NodeSummaryBuilder> create(const locop::SymbolTable *tbl) const final
{
- return stdex::make_unique<CompositeBuilder>(tbl);
+ return std::make_unique<CompositeBuilder>(tbl);
}
};
- std::cout << locop::fmt<locop::LinearV1>(g).with(stdex::make_unique<MyFactory>()) << std::endl;
+ std::cout << locop::fmt<locop::LinearV1>(g).with(std::make_unique<MyFactory>()) << std::endl;
// TODO Check whether MyBuilder actually sees all the nodes in a graph
SUCCEED();
return os;
}
-} // namespace
+} // namespace loco
namespace locop
{
#include "locop/FormattedTensorShape.h"
-#include <stdex/Memory.h>
+#include <memory>
#include <gtest/gtest.h>
TEST(FormattedTensorShapeTest, BracketFormat)
{
- auto tensor_shape = stdex::make_unique<loco::TensorShape>();
+ auto tensor_shape = std::make_unique<loco::TensorShape>();
tensor_shape->rank(2);
tensor_shape->dim(0) = 4;
+ tensor_shape->dim(1) = 8;
std::cout << fmt<TensorShapeFormat::Bracket>(tensor_shape.get()) << std::endl;
SUCCEED();
}
+
+TEST(FormattedTensorShapeTest, PlainFormat)
+{
+ auto tensor_shape = std::make_unique<loco::TensorShape>();
+
+ tensor_shape->rank(2);
+ tensor_shape->dim(0) = 4;
+ tensor_shape->dim(1) = 8;
+
+ std::cout << fmt<TensorShapeFormat::Plain>(tensor_shape.get()) << std::endl;
+
+ SUCCEED();
+}
#include "locop/GenericNodeSummaryBuilder.h"
#include "locop/FormattedGraph.h"
-#include <stdex/Memory.h>
-
+#include <memory>
#include <stdexcept>
#include <gtest/gtest.h>
{
std::unique_ptr<locop::NodeSummaryBuilder> create(const locop::SymbolTable *tbl) const final
{
- return stdex::make_unique<locop::GenericNodeSummaryBuilder>(tbl);
+ return std::make_unique<locop::GenericNodeSummaryBuilder>(tbl);
}
};
g->nodes()->create<MockNode>();
- std::cout << locop::fmt<locop::LinearV1>(g).with(stdex::make_unique<MockFactory>()) << std::endl;
+ std::cout << locop::fmt<locop::LinearV1>(g).with(std::make_unique<MockFactory>()) << std::endl;
SUCCEED();
}
#include "locop/NodeSummary.h"
-#include <stdex/Memory.h>
-
+#include <memory>
#include <cassert>
namespace locop
return *_name;
}
-void NodeDesc::opname(const std::string &v) { _name = stdex::make_unique<std::string>(v); }
+void NodeDesc::opname(const std::string &v) { _name = std::make_unique<std::string>(v); }
-} // namespace loco
+} // namespace locop
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/Phase.h>
+
+#include <loco.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+struct Bumblebee final : public logo::Pass
+{
+ const char *name(void) const final { return "Bee"; }
+ bool run(loco::Graph *) final { return false; }
+};
+
+} // namespace
+
+TEST(LogoPhaseSaturateTests, simple)
+{
+ loco::Graph g;
+ logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{&g};
+ logo::Phase phase;
+
+ phase.emplace_back(std::make_unique<Bumblebee>());
+ phase_runner.run(phase);
+
+ SUCCEED();
+}
+
+TEST(LogoPhaseRestartTests, simple)
+{
+ loco::Graph g;
+ logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{&g};
+ logo::Phase phase;
+
+ phase.emplace_back(std::make_unique<Bumblebee>());
+ phase_runner.run(phase);
+
+ SUCCEED();
+}
target_link_libraries(logo PUBLIC loco)
target_link_libraries(logo PUBLIC logo_core)
target_link_libraries(logo PRIVATE locomotiv)
-target_link_libraries(logo PRIVATE stdex)
if(NOT ENABLE_TEST)
return()
GTest_AddTest(logo_test ${TESTS})
target_include_directories(logo_test PRIVATE src)
target_link_libraries(logo_test logo)
-target_link_libraries(logo_test stdex)
require("loco")
require("logo-core")
require("locomotiv")
-require("stdex")
#include <loco.h>
#include <loco/IR/CanonicalDialect.h>
-#include <stdex/Memory.h>
-
#include <locomotiv/Session.h>
#include <cassert>
bool skip(const loco::Node *node)
{
static std::set<uint32_t> skip_op = {
- // TODO Current implementation works for 'Tensor' domain only. Support other domains such as
- // `Feature`, `Filter`, `Bias`, etc.
- static_cast<uint32_t>(loco::CanonicalOpcode::FilterEncode),
- static_cast<uint32_t>(loco::CanonicalOpcode::FeatureEncode),
- static_cast<uint32_t>(loco::CanonicalOpcode::BiasEncode),
- static_cast<uint32_t>(loco::CanonicalOpcode::DepthwiseFilterEncode),
-
- // We don't perform constant folding for Push
- static_cast<uint32_t>(loco::CanonicalOpcode::Push),
-
- // TensorBroadcast is a good hint for optimization
- // TODO Let this option be controlled by driver using logo
- static_cast<uint32_t>(loco::CanonicalOpcode::TensorBroadcast),
+ // TODO Current implementation works for 'Tensor' domain only. Support other domains such as
+ // `Feature`, `Filter`, `Bias`, etc.
+ static_cast<uint32_t>(loco::CanonicalOpcode::FilterEncode),
+ static_cast<uint32_t>(loco::CanonicalOpcode::FeatureEncode),
+ static_cast<uint32_t>(loco::CanonicalOpcode::BiasEncode),
+ static_cast<uint32_t>(loco::CanonicalOpcode::DepthwiseFilterEncode),
+
+ // We don't perform constant folding for Push
+ static_cast<uint32_t>(loco::CanonicalOpcode::Push),
+
+ // TensorBroadcast is a good hint for optimization
+ // TODO Let this option be controlled by driver using logo
+ static_cast<uint32_t>(loco::CanonicalOpcode::TensorBroadcast),
};
if (node->dialect() == loco::CanonicalDialect::get())
using namespace logo::test;
+TEST(ConstantFoldingTest, name)
+{
+ logo::ConstantFoldingPass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+TEST(ConstantFoldingTest, run_NEG)
+{
+ loco::Graph g;
+ logo::ConstantFoldingPass pass;
+
+ ASSERT_FALSE(pass.run(&g));
+}
+
namespace
{
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOGO_EMPTY_TEST_GRAPH_H__
+#define __LOGO_EMPTY_TEST_GRAPH_H__
+
+#include <loco.h>
+
+namespace logo
+{
+
+void create_empty_test_net(loco::Graph *graph);
+
+} // namespace logo
+
+#endif // __LOGO_EMPTY_TEST_GRAPH_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <loco.h>
+
+#include <gtest/gtest.h>
+
+namespace logo
+{
+
+void create_empty_test_net(loco::Graph *graph)
+{
+ assert(graph);
+
+ auto const_node = graph->nodes()->create<loco::ConstGen>();
+ {
+ const_node->dtype(loco::DataType::FLOAT32);
+ const_node->rank(1);
+ const_node->dim(0) = 1;
+ const_node->size<loco::DataType::FLOAT32>(1);
+ const_node->at<loco::DataType::FLOAT32>(0) = 1.0f;
+ }
+
+ auto push_node = graph->nodes()->create<loco::Push>();
+ {
+ push_node->from(const_node);
+ }
+
+ auto graph_output = graph->outputs()->create();
+ {
+ graph_output->name("output");
+ graph_output->dtype(loco::DataType::FLOAT32);
+ loco::link(graph_output, push_node);
+ }
+}
+
+} // namespace logo
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/RemoveDeadNodePass.h>
+
+#include "EmptyTestGraph.h"
+
+#include <gtest/gtest.h>
+
+TEST(RemoveDeadNodePassTest, name)
+{
+ logo::RemoveDeadNodePass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+TEST(RemoveDeadNodePassTest, run_NEG)
+{
+ loco::Graph g;
+ logo::RemoveDeadNodePass pass;
+
+ logo::create_empty_test_net(&g);
+
+ ASSERT_FALSE(pass.run(&g));
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/RemoveDeadNodeWithQueryPass.h>
+
+#include "EmptyTestGraph.h"
+
+#include <gtest/gtest.h>
+
+TEST(RemoveDeadNodeWithQueryPassTest, name)
+{
+ logo::RemoveDeadNodeWithQueryPass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+TEST(RemoveDeadNodeWithQueryPassTest, run_NEG)
+{
+ loco::Graph g;
+ logo::RemoveDeadNodeWithQueryPass pass;
+
+ logo::create_empty_test_net(&g);
+
+ ASSERT_FALSE(pass.run(&g));
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/RemoveForwardNodePass.h>
+
+#include "EmptyTestGraph.h"
+
+#include <gtest/gtest.h>
+
+TEST(RemoveForwardNodePassTest, name)
+{
+ logo::RemoveForwardNodePass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+TEST(RemoveForwardNodePassTest, run_NEG)
+{
+ loco::Graph g;
+ logo::RemoveForwardNodePass pass;
+
+ logo::create_empty_test_net(&g);
+
+ ASSERT_FALSE(pass.run(&g));
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/ReorderDecodePass.h>
+
+#include "EmptyTestGraph.h"
+
+#include <gtest/gtest.h>
+
+TEST(ReorderDecodePassTest, TensorBiasAdd_name)
+{
+ logo::ReorderDecodePass<loco::TensorBiasAdd> pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+TEST(ReorderDecodePassTest, ReLU_name)
+{
+ logo::ReorderDecodePass<loco::ReLU> pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+TEST(ReorderDecodePassTest, TensorBiasAdd_run_NEG)
+{
+ loco::Graph g;
+ logo::ReorderDecodePass<loco::TensorBiasAdd> pass;
+
+ logo::create_empty_test_net(&g);
+
+ ASSERT_FALSE(pass.run(&g));
+}
+
+TEST(ReorderDecodePassTest, ReLU_run_NEG)
+{
+ loco::Graph g;
+ logo::ReorderDecodePass<loco::ReLU> pass;
+
+ logo::create_empty_test_net(&g);
+
+ ASSERT_FALSE(pass.run(&g));
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/ResolveDuplicateReshapePass.h>
+
+#include "EmptyTestGraph.h"
+
+#include <gtest/gtest.h>
+
+TEST(ResolveDuplicateReshapePassTest, name)
+{
+ logo::ResolveDuplicateReshapePass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+TEST(ResolveDuplicateReshapePassTest, run_NEG)
+{
+ loco::Graph g;
+ logo::ResolveDuplicateReshapePass pass;
+
+ logo::create_empty_test_net(&g);
+
+ ASSERT_FALSE(pass.run(&g));
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/ResolveRedundantReshapePass.h>
+
+#include "EmptyTestGraph.h"
+
+#include <gtest/gtest.h>
+
+TEST(ResolveRedundantReshapePassTest, name)
+{
+ logo::ResolveRedundantReshapePass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+TEST(ResolveRedundantReshapePassTest, run_NEG)
+{
+ loco::Graph g;
+ logo::ResolveRedundantReshapePass pass;
+
+ logo::create_empty_test_net(&g);
+
+ ASSERT_FALSE(pass.run(&g));
+}
#include <loco/IR/CanonicalDialect.h>
#include <loco/IR/CanonicalNode.h>
-#include <stdex/Memory.h>
-
+#include <memory>
#include <set>
#include <vector>
#include <cassert>
perm_vec[to] = from;
}
- transposeCandidates.insert(stdex::make_unique<TransposeCtx>(
- encode_node, decode_node, encode_node->input(), perm_vec));
+ transposeCandidates.insert(
+ std::make_unique<TransposeCtx>(encode_node, decode_node, encode_node->input(), perm_vec));
}
}
perm_vec[to] = from;
}
- transposeCandidates.insert(stdex::make_unique<TransposeCtx>(
- encode_node, decode_node, encode_node->input(), perm_vec));
+ transposeCandidates.insert(
+ std::make_unique<TransposeCtx>(encode_node, decode_node, encode_node->input(), perm_vec));
}
}
perm_vec[to] = from;
}
- transposeCandidates.insert(stdex::make_unique<TransposeCtx>(
- encode_node, decode_node, encode_node->input(), perm_vec));
+ transposeCandidates.insert(
+ std::make_unique<TransposeCtx>(encode_node, decode_node, encode_node->input(), perm_vec));
}
}
TransposeCtx(loco::Node *first, loco::Node *last, loco::Node *input,
std::vector<loco::TensorAxis> perm)
- : first_node(first), last_node(last), input_node(input), perm_vec(perm)
+ : first_node(first), last_node(last), input_node(input), perm_vec(perm)
{ /* empty */
}
};
#include "TestHelper.h"
#include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
#include <gtest/gtest.h>
+TEST(SimplifyDomainConversionPassTest, name)
+{
+ logo::SimplifyDomainConversionPass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+TEST(SimplifyDomainConversionPassTest, run_NEG)
+{
+ loco::Graph g;
+ logo::SimplifyDomainConversionPass pass;
+
+ ASSERT_FALSE(pass.run(&g));
+}
+
namespace
{
{
loco::Graph *g = input_for_decode->graph();
- auto decoder = stdex::make_unique<loco::PermutingDecoder<loco::Domain::Filter>>();
+ auto decoder = std::make_unique<loco::PermutingDecoder<loco::Domain::Filter>>();
decoder->perm(perm<T>());
{
loco::Graph *g = input_for_encode->graph();
- auto encoder = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
+ auto encoder = std::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
encoder->perm(perm<T>());
--- /dev/null
+set(SRCS_EVAL_TESTER
+ src/EvalDriver.cpp
+ )
+
+add_executable(luci_eval_driver ${SRCS_EVAL_TESTER})
+target_link_libraries(luci_eval_driver PRIVATE oops)
+target_link_libraries(luci_eval_driver PRIVATE loco)
+target_link_libraries(luci_eval_driver PRIVATE luci_import)
+target_link_libraries(luci_eval_driver PRIVATE luci_export)
+target_link_libraries(luci_eval_driver PRIVATE luci_lang)
+target_link_libraries(luci_eval_driver PRIVATE luci_interpreter)
+target_link_libraries(luci_eval_driver PRIVATE safemain)
--- /dev/null
+require("oops")
+require("loco")
+require("luci")
+require("luci-interpreter")
+require("safemain")
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci/Importer.h>
+#include <luci_interpreter/Interpreter.h>
+#include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
+
+#include <cstdlib>
+#include <fstream>
+#include <vector>
+#include <string>
+
+namespace
+{
+
+void readDataFromFile(const std::string &filename, char *data, size_t data_size)
+{
+ std::ifstream fs(filename, std::ifstream::binary);
+ if (fs.fail())
+ throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
+ if (fs.read(data, data_size).fail())
+ throw std::runtime_error("Failed to read data from file \"" + filename + "\".\n");
+}
+
+void writeDataToFile(const std::string &filename, const char *data, size_t data_size)
+{
+ std::ofstream fs(filename, std::ofstream::binary);
+ if (fs.fail())
+ throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
+ if (fs.write(data, data_size).fail())
+ {
+ throw std::runtime_error("Failed to write data to file \"" + filename + "\".\n");
+ }
+}
+
+std::unique_ptr<luci::Module> importModel(const std::string &filename)
+{
+ std::ifstream fs(filename, std::ifstream::binary);
+ if (fs.fail())
+ {
+ throw std::runtime_error("Cannot open model file \"" + filename + "\".\n");
+ }
+ std::vector<char> model_data((std::istreambuf_iterator<char>(fs)),
+ std::istreambuf_iterator<char>());
+ return luci::Importer().importModule(circle::GetModel(model_data.data()));
+}
+
+template <typename NodeT> size_t getTensorSize(const NodeT *node)
+{
+ uint32_t tensor_size = loco::size(node->dtype());
+ for (uint32_t i = 0; i < node->rank(); ++i)
+ tensor_size *= node->dim(i).value();
+ return tensor_size;
+}
+
+} // namespace
+
+/*
+ * @brief EvalDriver main
+ *
+ * Driver for testing luci-inerpreter
+ *
+ */
+int entry(int argc, char **argv)
+{
+ if (argc != 5)
+ {
+ std::cerr
+ << "Usage: " << argv[0]
+ << " <path/to/circle/model> <num_inputs> <path/to/input/prefix> <path/to/output/file>\n";
+ return EXIT_FAILURE;
+ }
+
+ const char *filename = argv[1];
+ const int32_t num_inputs = atoi(argv[2]);
+ const char *input_prefix = argv[3];
+ const char *output_file = argv[4];
+
+ // Load model from the file
+ std::unique_ptr<luci::Module> module = importModel(filename);
+ if (module == nullptr)
+ {
+ std::cerr << "ERROR: Failed to load '" << filename << "'" << std::endl;
+ return EXIT_FAILURE;
+ }
+
+ // Create interpreter.
+ luci_interpreter::Interpreter interpreter(module.get());
+
+ // Set input.
+ // Data for n'th input is read from ${input_prefix}n
+ // (ex: Add.circle.input0, Add.circle.input1 ..)
+ const auto input_nodes = loco::input_nodes(module->graph());
+ assert(num_inputs == input_nodes.size());
+ for (int32_t i = 0; i < num_inputs; i++)
+ {
+ const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[i]);
+ std::vector<char> input_data(getTensorSize(input_node));
+ readDataFromFile(std::string(input_prefix) + std::to_string(i), input_data.data(),
+ input_data.size());
+ interpreter.writeInputTensor(input_node, input_data.data(), input_data.size());
+ }
+
+ // Do inference.
+ interpreter.interpret();
+
+ // Get output.
+ const auto output_nodes = loco::output_nodes(module->graph());
+ for (int i = 0; i < module->graph()->outputs()->size(); i++)
+ {
+ const auto *output_node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]);
+ std::vector<char> output_data(getTensorSize(output_node));
+ interpreter.readOutputTensor(output_node, output_data.data(), output_data.size());
+
+ // Output data is written in ${output_file}
+ // (ex: Add.circle.output0)
+ // Output shape is written in ${output_file}.shape
+ // (ex: Add.circle.output0.shape)
+ writeDataToFile(std::string(output_file) + std::to_string(i), output_data.data(),
+ output_data.size());
+ // In case of Tensor output is Scalar value.
+ // The output tensor with rank 0 is treated as a scalar with shape (1)
+ if (output_node->rank() == 0)
+ {
+ writeDataToFile(std::string(output_file) + std::to_string(i) + ".shape", "1", 1);
+ }
+ else
+ {
+ auto shape_str = std::to_string(output_node->dim(0).value());
+ for (int j = 1; j < output_node->rank(); j++)
+ {
+ shape_str += ",";
+ shape_str += std::to_string(output_node->dim(j).value());
+ }
+ writeDataToFile(std::string(output_file) + std::to_string(i) + ".shape", shape_str.c_str(),
+ shape_str.size());
+ }
+ }
+ return EXIT_SUCCESS;
+}
public:
EventNotifierImpl(const RuntimeToIR &runtime_to_ir,
const std::vector<ExecutionObserver *> &observers)
- : _runtime_to_ir(runtime_to_ir), _observers(observers)
+ : _runtime_to_ir(runtime_to_ir), _observers(observers)
{
}
{
protected:
Kernel(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs)
- : _inputs(std::move(inputs)), _outputs(std::move(outputs))
+ : _inputs(std::move(inputs)), _outputs(std::move(outputs))
{
}
protected:
KernelWithParams(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs,
const Params ¶ms)
- : Kernel(std::move(inputs), std::move(outputs)), _params(params)
+ : Kernel(std::move(inputs), std::move(outputs)), _params(params)
{
}
struct ConcatenationParams
{
int axis;
+ Activation activation;
};
struct Conv2DParams
Activation activation;
};
+struct PackParams
+{
+ int32_t values_count;
+ int32_t axis;
+};
+
struct Pool2DParams
{
Padding padding;
}
RuntimeGraph::RuntimeGraph(RuntimeModule *owning_module)
- : _owning_module(owning_module), _tensor_alloc_plan(std::make_unique<TensorAllocPlan>())
+ : _owning_module(owning_module), _tensor_alloc_plan(std::make_unique<TensorAllocPlan>())
{
}
Tensor::Tensor(DataType element_type, Shape shape, AffineQuantization quantization,
std::string name)
- : _element_type(element_type), _shape(std::move(shape)), _quantization(std::move(quantization)),
- _name(std::move(name)), _data_allocated(false)
+ : _element_type(element_type), _shape(std::move(shape)), _quantization(std::move(quantization)),
+ _name(std::move(name)), _data_allocated(false)
{
}
{
Add::Add(const Tensor *input1, const Tensor *input2, Tensor *output, const AddParams ¶ms)
- : KernelWithParams<AddParams>({input1, input2}, {output}, params)
+ : KernelWithParams<AddParams>({input1, input2}, {output}, params)
{
}
params.float_activation_max = activation_max;
const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
- getTensorShape(input1()), getTensorShape(input2()), ¶ms);
+ getTensorShape(input1()), getTensorShape(input2()), ¶ms);
if (need_broadcast)
{
tflite::reference_ops::BroadcastAdd4DSlow(
- params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
- getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+ params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+ getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
}
else
{
params.quantized_activation_max = activation_max;
const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
- getTensorShape(input1()), getTensorShape(input2()), ¶ms);
+ getTensorShape(input1()), getTensorShape(input2()), ¶ms);
if (need_broadcast)
{
tflite::reference_ops::BroadcastAdd4DSlow(
- params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
- getTensorShape(input2()), getTensorData<uint8_t>(input2()), getTensorShape(output()),
- getTensorData<uint8_t>(output()));
+ params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
+ getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
}
else
{
const int32_t shifted_input1_val = static_cast<int32_t>(input1_val) << left_shift;
const int32_t shifted_input2_val = static_cast<int32_t>(input2_val) << left_shift;
const int32_t scaled_input1_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
- shifted_input1_val, input1_multiplier, input1_shift);
+ shifted_input1_val, input1_multiplier, input1_shift);
const int32_t scaled_input2_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
- shifted_input2_val, input2_multiplier, input2_shift);
+ shifted_input2_val, input2_multiplier, input2_shift);
const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
const int32_t raw_output = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
- raw_sum, output_multiplier, output_shift);
+ raw_sum, output_multiplier, output_shift);
const int32_t clamped_output = std::min(activation_max, std::max(activation_min, raw_output));
return static_cast<int16_t>(clamped_output);
};
std::initializer_list<float> base_data = {-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f,
1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
std::initializer_list<int32_t> test_shapes[] = {
- {1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+ {1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
std::initializer_list<float> test_data = {0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
std::initializer_list<int32_t> output_shapes[] = {
- {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
+ {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
std::vector<std::vector<float>> output_data = {
- {-0.1f, 2.6f, -0.7f, 2.8f, 0.7f, 3.0f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f,
- 1.0f, -0.8f, 0.4f, -0.6f, 1.8f, -0.2f, 1.4f, 3.0f, 0.8f, 3.0f, 2.2f, 3.0f,
- -1.4f, 0.3f, -2.0f, 0.5f, -0.6f, 0.9f, 0.9f, -1.9f, 0.3f, -1.7f, 1.7f, -1.3f},
- {-0.1f, 2.6f, 0.5f, 1.0f, 1.8f, -0.2f, 1.4f, 3.0f, -2.0f, 0.5f, 1.7f, -1.3f},
- {-0.1f, 2.5f, 0.0f, 2.6f, -0.7f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f,
- 1.0f, -0.9f, 1.1f, -0.8f, 0.4f, -1.5f, 1.7f, 3.0f, 2.2f, 3.0f, 2.1f, 3.0f,
- -1.1f, 0.5f, -0.6f, 1.0f, -0.7f, 0.9f, 1.2f, -1.7f, 1.7f, -1.2f, 1.6f, -1.3f},
- {-0.1f, 2.5f, 1.2f, 0.8f, 0.4f, -1.5f, 1.7f, 3.0f, -0.6f, 1.0f, 1.6f, -1.3f}};
+ {-0.1f, 2.6f, -0.7f, 2.8f, 0.7f, 3.0f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f,
+ 1.0f, -0.8f, 0.4f, -0.6f, 1.8f, -0.2f, 1.4f, 3.0f, 0.8f, 3.0f, 2.2f, 3.0f,
+ -1.4f, 0.3f, -2.0f, 0.5f, -0.6f, 0.9f, 0.9f, -1.9f, 0.3f, -1.7f, 1.7f, -1.3f},
+ {-0.1f, 2.6f, 0.5f, 1.0f, 1.8f, -0.2f, 1.4f, 3.0f, -2.0f, 0.5f, 1.7f, -1.3f},
+ {-0.1f, 2.5f, 0.0f, 2.6f, -0.7f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f,
+ 1.0f, -0.9f, 1.1f, -0.8f, 0.4f, -1.5f, 1.7f, 3.0f, 2.2f, 3.0f, 2.1f, 3.0f,
+ -1.1f, 0.5f, -0.6f, 1.0f, -0.7f, 0.9f, 1.2f, -1.7f, 1.7f, -1.2f, 1.6f, -1.3f},
+ {-0.1f, 2.5f, 1.2f, 0.8f, 0.4f, -1.5f, 1.7f, 3.0f, -0.6f, 1.0f, 1.6f, -1.3f}};
float kQuantizedTolerance = GetTolerance(-3.f, 3.f);
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-3.f, 3.f);
for (int i = 0; i < output_data.size(); i++)
{
Tensor input1_tensor =
- makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data);
+ makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data);
Tensor input2_tensor = makeInputTensor<DataType::U8>(test_shapes[i], quant_param.first,
quant_param.second, test_data);
Tensor output_tensor =
- makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+ makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
AddParams params{};
params.activation = Activation::NONE;
Tensor input1_tensor = makeInputTensor<DataType::U8>(test_shapes[i], quant_param.first,
quant_param.second, test_data);
Tensor input2_tensor =
- makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data);
+ makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data);
Tensor output_tensor =
- makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+ makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
AddParams params{};
params.activation = Activation::NONE;
Shape base_shape = {2, 3, 1, 2};
std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
std::vector<std::vector<float>> test_outputs = {
- {0.0f, 2.6f, 0.0f, 2.8f, 0.7f, 3.2f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f,
- 1.0f, 0.0f, 0.4f, 0.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.8f, 3.3f, 2.2f, 3.7f,
- 0.0f, 0.3f, 0.0f, 0.5f, 0.0f, 0.9f, 0.9f, 0.0f, 0.3f, 0.0f, 1.7f, 0.0f},
- {0.0f, 2.6f, 0.5f, 1.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.0f, 0.5f, 1.7f, 0.0f},
- {0.0f, 2.5f, 0.0f, 2.6f, 0.0f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f,
- 1.0f, 0.0f, 1.1f, 0.0f, 0.4f, 0.0f, 1.7f, 3.3f, 2.2f, 3.8f, 2.1f, 3.7f,
- 0.0f, 0.5f, 0.0f, 1.0f, 0.0f, 0.9f, 1.2f, 0.0f, 1.7f, 0.0f, 1.6f, 0.0f},
- {0.0f, 2.5f, 1.2f, 0.8f, 0.4f, 0.0f, 1.7f, 3.3f, 0.0f, 1.0f, 1.6f, 0.0f}};
+ {0.0f, 2.6f, 0.0f, 2.8f, 0.7f, 3.2f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f,
+ 1.0f, 0.0f, 0.4f, 0.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.8f, 3.3f, 2.2f, 3.7f,
+ 0.0f, 0.3f, 0.0f, 0.5f, 0.0f, 0.9f, 0.9f, 0.0f, 0.3f, 0.0f, 1.7f, 0.0f},
+ {0.0f, 2.6f, 0.5f, 1.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.0f, 0.5f, 1.7f, 0.0f},
+ {0.0f, 2.5f, 0.0f, 2.6f, 0.0f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f,
+ 1.0f, 0.0f, 1.1f, 0.0f, 0.4f, 0.0f, 1.7f, 3.3f, 2.2f, 3.8f, 2.1f, 3.7f,
+ 0.0f, 0.5f, 0.0f, 1.0f, 0.0f, 0.9f, 1.2f, 0.0f, 1.7f, 0.0f, 1.6f, 0.0f},
+ {0.0f, 2.5f, 1.2f, 0.8f, 0.4f, 0.0f, 1.7f, 3.3f, 0.0f, 1.0f, 1.6f, 0.0f}};
std::vector<float> input1_data{-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f,
1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
- << "With shape number " << i;
+ << "With shape number " << i;
}
// Re-run with exchanged inputs.
for (size_t i = 0; i < test_shapes.size(); ++i)
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
- << "With shape number " << i;
+ << "With shape number " << i;
}
}
Shape base_shape = {2, 3, 1, 2};
std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
std::vector<std::vector<int32_t>> ref_output_shapes{
- {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
+ {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
std::vector<float> input1_data{-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f,
1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
std::vector<std::vector<float>> ref_outputs = {
- {0.0f, 2.6f, 0.0f, 2.8f, 0.7f, 3.2f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f,
- 1.0f, 0.0f, 0.4f, 0.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.8f, 3.3f, 2.2f, 3.7f,
- 0.0f, 0.3f, 0.0f, 0.5f, 0.0f, 0.9f, 0.9f, 0.0f, 0.3f, 0.0f, 1.7f, 0.0f},
- {0.0f, 2.6f, 0.5f, 1.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.0f, 0.5f, 1.7f, 0.0f},
- {0.0f, 2.5f, 0.0f, 2.6f, 0.0f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f,
- 1.0f, 0.0f, 1.1f, 0.0f, 0.4f, 0.0f, 1.7f, 3.3f, 2.2f, 3.8f, 2.1f, 3.7f,
- 0.0f, 0.5f, 0.0f, 1.0f, 0.0f, 0.9f, 1.2f, 0.0f, 1.7f, 0.0f, 1.6f, 0.0f},
- {0.0f, 2.5f, 1.2f, 0.8f, 0.4f, 0.0f, 1.7f, 3.3f, 0.0f, 1.0f, 1.6f, 0.0f}};
+ {0.0f, 2.6f, 0.0f, 2.8f, 0.7f, 3.2f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f,
+ 1.0f, 0.0f, 0.4f, 0.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.8f, 3.3f, 2.2f, 3.7f,
+ 0.0f, 0.3f, 0.0f, 0.5f, 0.0f, 0.9f, 0.9f, 0.0f, 0.3f, 0.0f, 1.7f, 0.0f},
+ {0.0f, 2.6f, 0.5f, 1.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.0f, 0.5f, 1.7f, 0.0f},
+ {0.0f, 2.5f, 0.0f, 2.6f, 0.0f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f,
+ 1.0f, 0.0f, 1.1f, 0.0f, 0.4f, 0.0f, 1.7f, 3.3f, 2.2f, 3.8f, 2.1f, 3.7f,
+ 0.0f, 0.5f, 0.0f, 1.0f, 0.0f, 0.9f, 1.2f, 0.0f, 1.7f, 0.0f, 1.6f, 0.0f},
+ {0.0f, 2.5f, 1.2f, 0.8f, 0.4f, 0.0f, 1.7f, 3.3f, 0.0f, 1.0f, 1.6f, 0.0f}};
for (size_t i = 0; i < test_shapes.size(); ++i)
{
Tensor input1_tensor = makeInputTensor<DataType::S16>(base_shape, 3.0 / 32767, 0, input1_data);
Tensor input2_tensor =
- makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0, input2_data);
+ makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0, input2_data);
Tensor output_tensor = makeOutputTensor(DataType::S16, 4.0 / 32767, 0);
const float tolerance = output_tensor.scale();
EXPECT_THAT(extractTensorShape(output_tensor),
::testing::ElementsAreArray(ref_output_shapes[i]))
- << "With shape number " << i;
+ << "With shape number " << i;
EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance))
- << "With shape number " << i;
+ << "With shape number " << i;
}
// Re-run with exchanged inputs and different scales.
for (size_t i = 0; i < test_shapes.size(); ++i)
{
Tensor input1_tensor =
- makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0, input2_data);
+ makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0, input2_data);
Tensor input2_tensor = makeInputTensor<DataType::S16>(base_shape, 4.0 / 32767, 0, input1_data);
Tensor output_tensor = makeOutputTensor(DataType::S16, 5.0 / 32767, 0);
const float tolerance = output_tensor.scale();
EXPECT_THAT(extractTensorShape(output_tensor),
::testing::ElementsAreArray(ref_output_shapes[i]))
- << "With shape number " << i;
+ << "With shape number " << i;
EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance))
- << "With shape number " << i;
+ << "With shape number " << i;
}
}
{
ArgMax::ArgMax(const Tensor *input, const Tensor *axis, Tensor *output, const ArgMaxParams ¶ms)
- : KernelWithParams<ArgMaxParams>({input, axis}, {output}, params)
+ : KernelWithParams<ArgMaxParams>({input, axis}, {output}, params)
{
}
void ArgMax::execute() const
{
-#define TF_LITE_ARG_MAX(data_type, axis_type, output_type) \
- tflite::optimized_ops::ArgMinMax(getTensorShape(input()), getTensorData<data_type>(input()), \
- getTensorData<axis_type>(axis()), getTensorShape(output()), \
- getTensorData<output_type>(output()), \
- std::greater<data_type>())
+#define TF_LITE_ARG_MAX(data_type, axis_type, output_type) \
+ tflite::optimized_ops::ArgMinMax( \
+ getTensorShape(input()), getTensorData<data_type>(input()), getTensorData<axis_type>(axis()), \
+ getTensorShape(output()), getTensorData<output_type>(output()), std::greater<data_type>())
if (axis()->element_type() == DataType::S32)
{
switch (_params.output_type)
/*output_shape=*/{1, 1, 1},
/*input_data=*/
{
- 1, 9, 7, 3,
+ 1, 9, 7, 3, //
},
/*dimension_data=*/{3}, /*output_data=*/{1});
Check<TypeParam, int64_t>(/*input_shape=*/{1, 1, 1, 4}, /*dimension_shape=*/{},
/*output_shape=*/{1, 1, 1},
/*input_data=*/
{
- 1, 9, 7, 3,
+ 1, 9, 7, 3, //
},
/*dimension_data=*/{3}, /*output_data=*/{1});
}
/*output_shape=*/{1, 1, 2},
/*input_data=*/
{
- 1, 2, 7, 8, 1, 9, 7, 3,
+ 1, 2, 7, 8, //
+ 1, 9, 7, 3, //
},
/*dimension_data=*/{3}, /*output_data=*/{3, 1});
Check<TypeParam, int64_t>(/*input_shape=*/{1, 1, 2, 4}, /*dimension_shape=*/{},
/*output_shape=*/{1, 1, 2},
/*input_data=*/
{
- 1, 2, 7, 8, 1, 9, 7, 3,
+ 1, 2, 7, 8, //
+ 1, 9, 7, 3, //
},
/*dimension_data=*/{3}, /*output_data=*/{3, 1});
}
TEST(ArgMaxTest, UnsupportedType_NEG)
{
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4}, {
- 1, 2, 7, 8, 1, 9, 7, 3,
+ 1, 2, 7, 8, //
+ 1, 9, 7, 3, //
});
Tensor dimension_tensor = makeInputTensor<DataType::S32>({}, {3});
Tensor output_tensor = makeOutputTensor(DataType::U8);
{
AveragePool2D::AveragePool2D(const Tensor *input, Tensor *output, const Pool2DParams ¶ms)
- : KernelWithParams<Pool2DParams>({input}, {output}, params)
+ : KernelWithParams<Pool2DParams>({input}, {output}, params)
{
}
const int32_t input_width = input_shape.dim(2);
const int32_t depth = input_shape.dim(3);
- const int32_t output_height = computeOutputSize(_params.padding, input_height,
- _params.filter_height, _params.stride_height);
+ const int32_t output_height =
+ computeOutputSize(_params.padding, input_height, _params.filter_height, _params.stride_height);
const int32_t output_width =
- computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width);
+ computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width);
_padding_height =
- computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height);
+ computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height);
_padding_width =
- computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
+ computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
if (input()->element_type() == DataType::U8)
{
LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
params.quantized_activation_max = activation_max;
tflite::reference_integer_ops::AveragePool(
- params, getTensorShape(input()), getTensorData<int16_t>(input()), //
- getTensorShape(output()), getTensorData<int16_t>(output()));
+ params, getTensorShape(input()), getTensorData<int16_t>(input()), //
+ getTensorShape(output()), getTensorData<int16_t>(output()));
}
} // namespace kernels
{
Shape input_shape{1, 3, 5, 1};
std::vector<float> input_data{
- -4, -3, -2, -1, 0, //
- 1, 2, 3, 4, 5, //
- 6, 7, 8, 9, 10, //
+ -4, -3, -2, -1, 0, //
+ 1, 2, 3, 4, 5, //
+ 6, 7, 8, 9, 10, //
};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
kernel.execute();
std::vector<float> ref_output_data{
- 0, 1.5, //
- 4.5, 6, //
+ 0, 1.5, //
+ 4.5, 6, //
};
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 1}));
TEST(AveragePool2DTest, Uint8_0)
{
std::vector<float> input_data{
- 0, -6, 12, 4, //
- -3, -2, 10, 7, //
+ 0, -6, 12, 4, //
+ -3, -2, 10, 7, //
};
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
- Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first,
- quant_param.second, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, input_data);
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
Pool2DParams params{};
TEST(AveragePool2DTest, Uint8_1)
{
std::vector<float> input_data{
- 0, 6, 12, 4, //
- 3, 2, 10, 7, //
+ 0, 6, 12, 4, //
+ 3, 2, 10, 7, //
};
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
- Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first,
- quant_param.second, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, input_data);
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
Pool2DParams params{};
Shape input_shape{1, 3, 5, 1};
std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
std::vector<float> input_data{
- -4, -3, -2, -1, 0, //
- 1, 2, 3, 4, 5, //
- 6, 7, 8, 9, 10, //
+ -4, -3, -2, -1, 0, //
+ 1, 2, 3, 4, 5, //
+ 6, 7, 8, 9, 10, //
};
std::vector<float> ref_output_data{
- 0, 1.5, //
- 4.5, 6, //
+ 0, 1.5, //
+ 4.5, 6, //
};
Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, 0.5, 0, input_data);
Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
{
Shape input_shape{1, 3, 5};
std::vector<float> input_data{
- -4, -3, -2, -1, 0, //
- 1, 2, 3, 4, 5, //
- 6, 7, 8, 9, 10, //
+ -4, -3, -2, -1, 0, //
+ 1, 2, 3, 4, 5, //
+ 6, 7, 8, 9, 10, //
};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
{
Shape input_shape{1, 3, 5, 1};
std::vector<float> input_data{
- -4, -3, -2, -1, 0, //
- 1, 2, 3, 4, 5, //
- 6, 7, 8, 9, 10, //
+ -4, -3, -2, -1, 0, //
+ 1, 2, 3, 4, 5, //
+ 6, 7, 8, 9, 10, //
};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
Tensor output_tensor = makeOutputTensor(DataType::U8);
TEST(AveragePool2DTest, Quant_Param_NEG)
{
std::vector<float> input_data{
- 0, -6, 12, 4, //
- -3, -2, 10, 7, //
+ 0, -6, 12, 4, //
+ -3, -2, 10, 7, //
};
std::pair<float, int32_t> quant_param1 = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchToSpaceND.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+namespace
+{
+const int kInputMinDimensionNum = 3;
+const int kInputMaxDimensionNum = 4;
+} // namespace
+
+BatchToSpaceND::BatchToSpaceND(const Tensor *input, const Tensor *block_shape, const Tensor *crops,
+ Tensor *output)
+ : Kernel({input, block_shape, crops}, {output})
+{
+}
+
+void BatchToSpaceND::configure()
+{
+
+ const auto *block_shape_data = block_shape()->data<int32_t>();
+ const auto *crops_data = crops()->data<int32_t>();
+ LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= kInputMinDimensionNum);
+ LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= kInputMaxDimensionNum);
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+ int spatial_dims_num = input()->shape().num_dims() - 2;
+
+ LUCI_INTERPRETER_CHECK(block_shape()->shape().num_dims() == 1);
+ LUCI_INTERPRETER_CHECK(block_shape()->shape().dim(0) == spatial_dims_num);
+
+ LUCI_INTERPRETER_CHECK(crops()->shape().num_dims() == 2);
+ LUCI_INTERPRETER_CHECK(crops()->shape().dim(0) == spatial_dims_num);
+ LUCI_INTERPRETER_CHECK(crops()->shape().dim(1) == 2);
+ for (int i = 0; i < spatial_dims_num * 2; ++i)
+ {
+ LUCI_INTERPRETER_CHECK(crops_data[i] >= 0);
+ }
+
+ Shape output_shape = Shape(input()->shape().num_dims());
+ int output_batch_size = input()->shape().dim(0);
+ for (int i = 0; i < spatial_dims_num; ++i)
+ {
+ LUCI_INTERPRETER_CHECK(output_batch_size % block_shape_data[i] == 0);
+ output_batch_size = output_batch_size / block_shape_data[i];
+ output_shape.dim(i + 1) =
+ input()->shape().dim(i + 1) * block_shape_data[i] - crops_data[i * 2] - crops_data[i * 2 + 1];
+ }
+
+ output_shape.dim(0) = output_batch_size;
+ output_shape.dim(input()->shape().num_dims() - 1) =
+ input()->shape().dim(input()->shape().num_dims() - 1);
+ output()->resize(output_shape);
+}
+
+void BatchToSpaceND::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ tflite::optimized_ops::BatchToSpaceND(
+ getTensorShape(input()), getTensorData<float>(input()), getTensorShape(block_shape()),
+ getTensorData<int32_t>(block_shape()), getTensorShape(crops()),
+ getTensorData<int32_t>(crops()), getTensorShape(output()), getTensorData<float>(output()));
+ break;
+ case DataType::U8:
+ tflite::optimized_ops::BatchToSpaceND(
+ getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(block_shape()),
+ getTensorData<int32_t>(block_shape()), getTensorShape(crops()),
+ getTensorData<int32_t>(crops()), getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_BATCHTOSPACEND_H
+#define LUCI_INTERPRETER_KERNELS_BATCHTOSPACEND_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class BatchToSpaceND : public Kernel
+{
+public:
+ BatchToSpaceND(const Tensor *input, const Tensor *block_shape, const Tensor *crops,
+ Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *block_shape() const { return _inputs[1]; }
+ const Tensor *crops() const { return _inputs[2]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_BATCHTOSPACEND_H
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchToSpaceND.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape,
+ std::initializer_list<int32_t> block_shape_shape,
+ std::initializer_list<int32_t> crops_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<T> input_data, std::initializer_list<int32_t> block_shape_data,
+ std::initializer_list<int32_t> crops_data, std::initializer_list<T> output_data)
+{
+ constexpr DataType element_type = getElementType<T>();
+ Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
+ Tensor block_shape_tensor = makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data);
+ Tensor crops_tensor = makeInputTensor<DataType::S32>(crops_shape, crops_data);
+ Tensor output_tensor = makeOutputTensor(element_type);
+
+ BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor);
+ kernel.configure();
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <typename T> class BatchToSpaceNDTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_CASE(BatchToSpaceNDTest, DataTypes);
+
+TYPED_TEST(BatchToSpaceNDTest, Simple)
+{
+ Check<TypeParam>(/*input_shape=*/{4, 2, 2, 1}, /*block_shape_shape=*/{2}, /*crops_shape=*/{2, 2},
+ /*output_shape=*/{1, 4, 4, 1},
+ /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+ /*block_shape_data=*/{2, 2}, /*crops_data=*/{0, 0, 0, 0},
+ /*output_data=*/{1, 5, 2, 6, 9, 13, 10, 14, 3, 7, 4, 8, 11, 15, 12, 16});
+}
+
+TEST(BatchToSpaceNDTest, Invalid_Shape_NEG)
+{
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({3, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
+ Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2});
+ Tensor crops_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, 0, 0});
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(BatchToSpaceNDTest, Invalid_Crops_NEG)
+{
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+ {4, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
+ Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2});
+ Tensor crops_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, -1, 0});
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
if (unextended_input1_shape == unextended_input2_shape)
{
const int flat_size = tflite::MatchingElementsSize(
- unextended_input1_shape, unextended_input2_shape, unextended_output_shape);
+ unextended_input1_shape, unextended_input2_shape, unextended_output_shape);
for (int i = 0; i < flat_size; ++i)
{
output_data[i] = op(input1_data[i], input2_data[i]);
auto fn = [&](int indexes[N]) {
output_data[SubscriptToIndex(output_desc, indexes)] =
- op(input1_data[SubscriptToIndex(desc1, indexes)],
- input2_data[SubscriptToIndex(desc2, indexes)]);
+ op(input1_data[SubscriptToIndex(desc1, indexes)],
+ input2_data[SubscriptToIndex(desc2, indexes)]);
};
tflite::NDOpsHelper<N>(output_desc, fn);
}
find_package(Threads REQUIRED)
-nnas_find_package(GTest REQUIRED)
set(SOURCES
Add.h
ArgMax.cpp
AveragePool2D.h
AveragePool2D.cpp
+ BatchToSpaceND.h
+ BatchToSpaceND.cpp
Concatenation.h
Concatenation.cpp
Conv2D.h
Minimum.cpp
Mul.h
Mul.cpp
+ Neg.h
+ Neg.cpp
NotEqual.h
NotEqual.cpp
+ Pack.h
+ Pack.cpp
Pad.h
Pad.cpp
Pow.h
Slice.cpp
Softmax.h
Softmax.cpp
+ SpaceToBatchND.h
+ SpaceToBatchND.cpp
SpaceToDepth.h
SpaceToDepth.cpp
Split.h
StridedSlice.cpp
Sqrt.h
Sqrt.cpp
+ SquaredDifference.h
+ SquaredDifference.cpp
Squeeze.h
Squeeze.cpp
Sub.h
PUBLIC luci_interpreter_core
PRIVATE nncc_common Threads::Threads)
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
set(TEST_SOURCES
Add.test.cpp
ArgMax.test.cpp
AveragePool2D.test.cpp
+ BatchToSpaceND.test.cpp
Concatenation.test.cpp
Conv2D.test.cpp
DepthToSpace.test.cpp
Mean.test.cpp
Minimum.test.cpp
Mul.test.cpp
+ Neg.test.cpp
NotEqual.test.cpp
+ Pack.test.cpp
Pad.test.cpp
Pow.test.cpp
Prelu.test.cpp
Rsqrt.test.cpp
Slice.test.cpp
Softmax.test.cpp
+ SpaceToBatchND.test.cpp
SpaceToDepth.test.cpp
Split.test.cpp
StridedSlice.test.cpp
Sqrt.test.cpp
+ SquaredDifference.test.cpp
Squeeze.test.cpp
Sub.test.cpp
Tanh.test.cpp
Concatenation::Concatenation(std::vector<const Tensor *> inputs, Tensor *output,
const ConcatenationParams ¶ms)
- : KernelWithParams<ConcatenationParams>(std::move(inputs), {output}, params)
+ : KernelWithParams<ConcatenationParams>(std::move(inputs), {output}, params)
{
}
LUCI_INTERPRETER_CHECK(num_inputs > 0);
const Tensor *t0 = _inputs[0];
+ // TODO: Support concat with fused activation function
+ LUCI_INTERPRETER_CHECK(params().activation == luci::FusedActFunc::NONE);
+
int axis = _params.axis;
if (axis < 0)
axis += t0->shape().num_dims();
// Try different 'axis' and expect different results.
{
params.axis = 0;
+ params.activation = luci::FusedActFunc::NONE;
Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
kernel.configure();
}
{
params.axis = -2; // Same as '0'.
+ params.activation = luci::FusedActFunc::NONE;
Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
kernel.configure();
}
{
params.axis = 1;
+ params.activation = luci::FusedActFunc::NONE;
Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
kernel.configure();
}
{
params.axis = -1; // Same as '1'.
+ params.activation = luci::FusedActFunc::NONE;
Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
kernel.configure();
ConcatenationParams params{};
params.axis = -1;
+ params.activation = luci::FusedActFunc::NONE;
Concatenation kernel({}, &output_tensor, params);
EXPECT_ANY_THROW(kernel.configure());
ConcatenationParams params{};
params.axis = -3;
+ params.activation = luci::FusedActFunc::NONE;
Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
EXPECT_ANY_THROW(kernel.configure());
ConcatenationParams params{};
params.axis = -1;
+ params.activation = luci::FusedActFunc::NONE;
Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
EXPECT_ANY_THROW(kernel.configure());
ConcatenationParams params{};
params.axis = -1;
+ params.activation = luci::FusedActFunc::NONE;
Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
EXPECT_ANY_THROW(kernel.configure());
ConcatenationParams params{};
params.axis = -1;
+ params.activation = luci::FusedActFunc::NONE;
Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
EXPECT_ANY_THROW(kernel.configure());
ConcatenationParams params{};
params.axis = -1;
+ params.activation = luci::FusedActFunc::NONE;
+
+ Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+// TODO: Remove this test when concat w/ fused_activation is supported
+TEST(ConcatenationTest, With_Fused_Activation_NEG)
+{
+ std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+ std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+ Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data);
+ Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data);
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ ConcatenationParams params{};
+
+ params.axis = 1;
+ params.activation = luci::FusedActFunc::RELU;
Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
EXPECT_ANY_THROW(kernel.configure());
Conv2D::Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
const Conv2DParams ¶ms)
- : KernelWithParams<Conv2DParams>({input, filter, bias}, {output}, params)
+ : KernelWithParams<Conv2DParams>({input, filter, bias}, {output}, params)
{
}
bias()->shape().dim(0) == output_depth));
const int32_t output_height =
- computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
- _params.dilation_height_factor);
+ computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
+ _params.dilation_height_factor);
const int32_t output_width =
- computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width,
- _params.dilation_width_factor);
+ computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width,
+ _params.dilation_width_factor);
_padding_height = computePadding(_params.stride_height, _params.dilation_height_factor,
input_height, filter_height, output_height);
// Allocate tensor for Im2Col, if needed.
// The checks here should be aligned with the actual implementation.
const bool need_dilated_im2col =
- _params.dilation_height_factor != 1 || _params.dilation_width_factor != 1;
+ _params.dilation_height_factor != 1 || _params.dilation_width_factor != 1;
const bool need_non_dilated_im2col = _params.stride_height != 1 || _params.stride_width != 1 ||
filter_height != 1 || filter_width != 1;
const bool need_im2col =
- input()->element_type() != DataType::S16 && (need_dilated_im2col || need_non_dilated_im2col);
+ input()->element_type() != DataType::S16 && (need_dilated_im2col || need_non_dilated_im2col);
if (need_im2col)
{
const int input_depth = input_shape.dim(3);
try
{
_im2col =
- std::make_unique<Tensor>(input()->element_type(), im2col_shape, AffineQuantization{}, "");
+ std::make_unique<Tensor>(input()->element_type(), im2col_shape, AffineQuantization{}, "");
}
catch (std::bad_alloc &ba)
{
params.float_activation_max = activation_max;
if (_im2col)
- tflite::optimized_ops::Conv(params, getTensorShape(input()), getTensorData<float>(input()),
- getTensorShape(filter()), getTensorData<float>(filter()),
- getTensorShape(bias()), getTensorData<float>(bias()),
- getTensorShape(output()), getTensorData<float>(output()),
- getTensorShape(_im2col.get()), getTensorData<float>(_im2col.get()));
- else
- tflite::reference_ops::Conv(
+ {
+ try
+ {
+ tflite::optimized_ops::Conv(
+ params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
+ getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
+ getTensorShape(output()), getTensorData<float>(output()), getTensorShape(_im2col.get()),
+ getTensorData<float>(_im2col.get()));
+ }
+ catch (std::bad_alloc &ba)
+ {
+ // Failed memory allocation
+ _im2col->deallocate();
+
+ tflite::reference_ops::Conv(
params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
getTensorShape(output()), getTensorData<float>(output()), tflite::RuntimeShape(), nullptr);
+ }
+ }
+ else
+ tflite::reference_ops::Conv(
+ params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
+ getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
+ getTensorShape(output()), getTensorData<float>(output()), tflite::RuntimeShape(), nullptr);
}
void Conv2D::evalQuantized() const
gemmlowp_context->set_max_num_threads(static_cast<int>(std::thread::hardware_concurrency()));
tflite::optimized_ops::Conv(
- params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(filter()),
- getTensorData<uint8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
- getTensorShape(output()), getTensorData<uint8_t>(output()), getTensorShape(_im2col.get()),
- getTensorData<uint8_t>(_im2col.get()), gemmlowp_context.get());
+ params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(filter()),
+ getTensorData<uint8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
+ getTensorShape(output()), getTensorData<uint8_t>(output()), getTensorShape(_im2col.get()),
+ getTensorData<uint8_t>(_im2col.get()), gemmlowp_context.get());
}
void Conv2D::evalQuantizedPerChannel() const
calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
const std::vector<double> effective_output_scale =
- getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+ getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
const std::vector<ChannelQuantMultipliers> multipliers_raw =
- quantizeMultipliers(effective_output_scale);
+ quantizeMultipliers(effective_output_scale);
BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(multipliers_raw);
for (int32_t batch = 0; batch < batches; ++batch)
for (int32_t in_c = 0; in_c < input_depth; ++in_c)
{
const uint8_t input_val =
- input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+ input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
const uint8_t filter_val =
- filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+ filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
acc += static_cast<int32_t>(input_val - input()->zero_point()) *
static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]);
}
}
int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
- acc, quant_multipliers[out_c].multiplier, quant_multipliers[out_c].shift);
+ acc, quant_multipliers[out_c].multiplier, quant_multipliers[out_c].shift);
scaled_acc += output()->zero_point();
scaled_acc = std::max(scaled_acc, activation_min);
calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
const std::vector<double> effective_output_scale =
- getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+ getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
const std::vector<ChannelQuantMultipliers> multipliers_raw =
- quantizeMultipliers(effective_output_scale);
+ quantizeMultipliers(effective_output_scale);
BroadcastableWrapper<ChannelQuantMultipliers> multipliers(multipliers_raw);
for (int32_t batch = 0; batch < batches; ++batch)
for (int32_t in_c = 0; in_c < input_depth; ++in_c)
{
const int16_t input_val =
- input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+ input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
const int16_t filter_val =
- filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+ filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
acc += static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
}
}
}
int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
- acc, multipliers[out_c].multiplier, multipliers[out_c].shift);
+ acc, multipliers[out_c].multiplier, multipliers[out_c].shift);
scaled_acc = std::max(scaled_acc, activation_min);
scaled_acc = std::min(scaled_acc, activation_max);
Shape filter_shape{2, 2, 2, 2};
Shape bias_shape{2};
std::vector<float> input_data{
- 1, 2, 3, 4, 5, 6, // row = 0
- 7, 8, 9, 10, 11, 12, // row = 1
- 13, 14, 15, 16, 17, 18, // row = 2
- 19, 20, 21, 22, 23, 24, // row = 3
+ 1, 2, 3, 4, 5, 6, // row = 0
+ 7, 8, 9, 10, 11, 12, // row = 1
+ 13, 14, 15, 16, 17, 18, // row = 2
+ 19, 20, 21, 22, 23, 24, // row = 3
};
std::vector<float> filter_data{
- 1, 2, -3, -4, // out = 0, row = 0
- -5, 6, -7, 8, // out = 1, row = 0
- 4, -2, 3, -1, // out = 0, row = 1
- -8, -6, 7, 5, // out = 1, row = 1
+ 1, 2, -3, -4, // out = 0, row = 0
+ -5, 6, -7, 8, // out = 1, row = 0
+ 4, -2, 3, -1, // out = 0, row = 1
+ -8, -6, 7, 5, // out = 1, row = 1
};
std::vector<float> bias_data{1, 2};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
kernel.execute();
std::vector<float> ref_output_data{
- 11, 16, 7, 20, // row = 0
- 0, 40, 0, 44, // row = 1
+ 11, 16, 7, 20, // row = 0
+ 0, 40, 0, 44, // row = 1
};
std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
Shape filter_shape{3, 2, 2, 1};
Shape bias_shape{3};
std::vector<float> input_data{
- // First batch
- 1, 1, 1, 1, // row = 1
- 2, 2, 2, 2, // row = 2
- // Second batch
- 1, 2, 3, 4, // row = 1
- 1, 2, 3, 4, // row = 2
+ // First batch
+ 1, 1, 1, 1, // row = 1
+ 2, 2, 2, 2, // row = 2
+ // Second batch
+ 1, 2, 3, 4, // row = 1
+ 1, 2, 3, 4, // row = 2
};
std::vector<float> filter_data{
- 1, 2, 3, 4, // first 2x2 filter
- -1, 1, -1, 1, // second 2x2 filter
- -1, -1, 1, 1, // third 2x2 filter
+ 1, 2, 3, 4, // first 2x2 filter
+ -1, 1, -1, 1, // second 2x2 filter
+ -1, -1, 1, 1, // third 2x2 filter
};
std::vector<float> bias_data{1, 2, 3};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
kernel.execute();
std::vector<float> ref_output_data{
- 18, 2, 5, // first batch, left
- 18, 2, 5, // first batch, right
- 17, 4, 3, // second batch, left
- 37, 4, 3, // second batch, right
+ 18, 2, 5, // first batch, left
+ 18, 2, 5, // first batch, right
+ 17, 4, 3, // second batch, left
+ 37, 4, 3, // second batch, right
};
std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
TEST(Conv2DTest, Uint8)
{
std::vector<float> input_data{
- // First batch
- 1, 1, 1, 1, // row = 1
- 2, 2, 2, 2, // row = 2
- // Second batch
- 1, 2, 3, 4, // row = 1
- 1, 2, 3, 4, // row = 2
+ // First batch
+ 1, 1, 1, 1, // row = 1
+ 2, 2, 2, 2, // row = 2
+ // Second batch
+ 1, 2, 3, 4, // row = 1
+ 1, 2, 3, 4, // row = 2
};
std::vector<float> filter_data{
- 1, 2, 3, 4, // first 2x2 filter
- -1, 1, -1, 1, // second 2x2 filter
- -1, -1, 1, 1, // third 2x2 filter
+ 1, 2, 3, 4, // first 2x2 filter
+ -1, 1, -1, 1, // second 2x2 filter
+ -1, -1, 1, 1, // third 2x2 filter
};
std::vector<float> bias_data{1, 2, 3};
Tensor filter_tensor = makeInputTensor<DataType::U8>({3, 2, 2, 1}, input_quant_param.first,
input_quant_param.second, filter_data);
Tensor bias_tensor = makeInputTensor<DataType::S32>(
- {3}, input_quant_param.first * input_quant_param.first, 0, bias_data);
+ {3}, input_quant_param.first * input_quant_param.first, 0, bias_data);
Tensor output_tensor =
- makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+ makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
Conv2DParams params{};
params.padding = Padding::VALID;
kernel.execute();
std::vector<float> ref_output_data{
- 18, 2, 5, // first batch, left
- 18, 2, 5, // first batch, right
- 17, 4, 3, // second batch, left
- 37, 4, 3, // second batch, right
+ 18, 2, 5, // first batch, left
+ 18, 2, 5, // first batch, right
+ 17, 4, 3, // second batch, left
+ 37, 4, 3, // second batch, right
};
std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
{
const int output_channels = 3;
std::vector<float> input_data{
- // First batch
- 1, 1, 1, 1, // row = 1
- 2, 2, 2, 2, // row = 2
- // Second batch
- 1, 2, 3, 4, // row = 1
- 1, 2, 3, 4, // row = 2
+ // First batch
+ 1, 1, 1, 1, // row = 1
+ 2, 2, 2, 2, // row = 2
+ // Second batch
+ 1, 2, 3, 4, // row = 1
+ 1, 2, 3, 4, // row = 2
};
std::vector<float> filter_data{
- 1, 2, 3, 4, // first 2x2 filter
- -1, 1, -1, 1, // second 2x2 filter
- -1, -1, 1, 1, // third 2x2 filter
+ 1, 2, 3, 4, // first 2x2 filter
+ -1, 1, -1, 1, // second 2x2 filter
+ -1, -1, 1, 1, // third 2x2 filter
};
std::vector<float> bias_data{1, 2, 3};
Shape filter_shape{output_channels, 2, 2, 1};
Tensor input_tensor = makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first,
input_quant_param.second, input_data);
Tensor filter_tensor =
- makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops, 0, filter_data);
+ makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops, 0, filter_data);
Tensor bias_tensor =
- makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0, bias_data);
+ makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0, bias_data);
Tensor output_tensor =
- makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+ makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
Conv2DParams params{};
params.padding = Padding::VALID;
kernel.execute();
std::vector<float> ref_output_data{
- 18, 2, 5, // first batch, left
- 18, 2, 5, // first batch, right
- 17, 4, 3, // second batch, left
- 37, 4, 3, // second batch, right
+ 18, 2, 5, // first batch, left
+ 18, 2, 5, // first batch, right
+ 17, 4, 3, // second batch, left
+ 37, 4, 3, // second batch, right
};
std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
std::vector<float> input_data{
- 1, 2, 3, 4, 5, 6, // row = 0
- 7, 8, 9, 10, 11, 12, // row = 1
- 13, 14, 15, 16, 17, 18, // row = 2
- 19, 20, 21, 22, 23, 24, // row = 3
+ 1, 2, 3, 4, 5, 6, // row = 0
+ 7, 8, 9, 10, 11, 12, // row = 1
+ 13, 14, 15, 16, 17, 18, // row = 2
+ 19, 20, 21, 22, 23, 24, // row = 3
};
std::vector<float> filter_data{
- 1, 2, -3, -4, // out = 0, row = 0
- -5, 6, -7, 8, // out = 1, row = 0
- 4, -2, 3, -1, // out = 0, row = 1
- -8, -6, 7, 5, // out = 1, row = 1
+ 1, 2, -3, -4, // out = 0, row = 0
+ -5, 6, -7, 8, // out = 1, row = 0
+ 4, -2, 3, -1, // out = 0, row = 1
+ -8, -6, 7, 5, // out = 1, row = 1
};
std::vector<float> bias_data{1, 2};
std::vector<float> ref_output_data{
- 11, 16, 7, 20, // row = 0
- 0, 40, 0, 44, // row = 1
+ 11, 16, 7, 20, // row = 0
+ 0, 40, 0, 44, // row = 1
};
Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data);
std::vector<int32_t> ref_output_shape{1, 2, 2, 3};
std::vector<float> input_data{
- 1, 2, // row = 0, col 0
- 3, 4, // row = 0, col 1
- 5, 6, // row = 1, col 0
- 7, 8, // row = 1, col 1
+ 1, 2, // row = 0, col 0
+ 3, 4, // row = 0, col 1
+ 5, 6, // row = 1, col 0
+ 7, 8, // row = 1, col 1
};
std::vector<float> filter_data{
- 4, -3, // out = 0
- 1, -3, // out = 1
- 5, -3, // out = 2
+ 4, -3, // out = 0
+ 1, -3, // out = 1
+ 5, -3, // out = 2
};
std::vector<float> bias_data{1, 10, 5};
std::vector<float> ref_output_data{
- 0, 5, 4, // row 0, col 0
- 1, 1, 8, // row 0, col 1
- 3, 0, 12, // row 1, col 0
- 5, 0, 16, // row 1, col 1
+ 0, 5, 4, // row 0, col 0
+ 1, 1, 8, // row 0, col 1
+ 3, 0, 12, // row 1, col 0
+ 5, 0, 16, // row 1, col 1
};
float input_scale = 0.25f;
Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data);
Tensor filter_tensor =
- makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0, filter_data);
+ makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0, filter_data);
Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data);
Tensor output_tensor = makeOutputTensor(DataType::S16, output_scale, 0);
Shape filter_shape{2, 2, 2, 2};
Shape bias_shape{2};
std::vector<int32_t> input_data{
- 1, 2, 3, 4, 5, 6, // row = 0
- 7, 8, 9, 10, 11, 12, // row = 1
- 13, 14, 15, 16, 17, 18, // row = 2
- 19, 20, 21, 22, 23, 24, // row = 3
+ 1, 2, 3, 4, 5, 6, // row = 0
+ 7, 8, 9, 10, 11, 12, // row = 1
+ 13, 14, 15, 16, 17, 18, // row = 2
+ 19, 20, 21, 22, 23, 24, // row = 3
};
std::vector<float> filter_data{
- 1, 2, -3, -4, // out = 0, row = 0
- -5, 6, -7, 8, // out = 1, row = 0
- 4, -2, 3, -1, // out = 0, row = 1
- -8, -6, 7, 5, // out = 1, row = 1
+ 1, 2, -3, -4, // out = 0, row = 0
+ -5, 6, -7, 8, // out = 1, row = 0
+ 4, -2, 3, -1, // out = 0, row = 1
+ -8, -6, 7, 5, // out = 1, row = 1
};
std::vector<float> bias_data{1, 2};
Tensor input_tensor = makeInputTensor<DataType::S32>(input_shape, input_data);
Shape filter_shape{2, 2, 2, 2};
Shape bias_shape{2};
std::vector<float> input_data{
- 1, 2, 3, 4, 5, 6, // row = 0
- 7, 8, 9, 10, 11, 12, // row = 1
- 13, 14, 15, 16, 17, 18, // row = 2
- 19, 20, 21, 22, 23, 24, // row = 3
+ 1, 2, 3, 4, 5, 6, // row = 0
+ 7, 8, 9, 10, 11, 12, // row = 1
+ 13, 14, 15, 16, 17, 18, // row = 2
+ 19, 20, 21, 22, 23, 24, // row = 3
};
std::vector<float> filter_data{
- 1, 2, -3, -4, // out = 0, row = 0
- -5, 6, -7, 8, // out = 1, row = 0
- 4, -2, 3, -1, // out = 0, row = 1
- -8, -6, 7, 5, // out = 1, row = 1
+ 1, 2, -3, -4, // out = 0, row = 0
+ -5, 6, -7, 8, // out = 1, row = 0
+ 4, -2, 3, -1, // out = 0, row = 1
+ -8, -6, 7, 5, // out = 1, row = 1
};
std::vector<uint8_t> bias_data{1, 2};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
Shape filter_shape{2, 2, 2, 2};
Shape bias_shape{3};
std::vector<float> input_data{
- 1, 2, 3, 4, 5, 6, // row = 0
- 7, 8, 9, 10, 11, 12, // row = 1
- 13, 14, 15, 16, 17, 18, // row = 2
- 19, 20, 21, 22, 23, 24, // row = 3
+ 1, 2, 3, 4, 5, 6, // row = 0
+ 7, 8, 9, 10, 11, 12, // row = 1
+ 13, 14, 15, 16, 17, 18, // row = 2
+ 19, 20, 21, 22, 23, 24, // row = 3
};
std::vector<float> filter_data{
- 1, 2, -3, -4, // out = 0, row = 0
- -5, 6, -7, 8, // out = 1, row = 0
- 4, -2, 3, -1, // out = 0, row = 1
- -8, -6, 7, 5, // out = 1, row = 1
+ 1, 2, -3, -4, // out = 0, row = 0
+ -5, 6, -7, 8, // out = 1, row = 0
+ 4, -2, 3, -1, // out = 0, row = 1
+ -8, -6, 7, 5, // out = 1, row = 1
};
std::vector<float> bias_data{1, 2, 3};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
Shape filter_shape{2, 2, 2, 2};
Shape bias_shape{2};
std::vector<float> input_data{
- 1, 2, 3, 4, 5, 6, // row = 0
- 7, 8, 9, 10, 11, 12, // row = 1
- 13, 14, 15, 16, 17, 18, // row = 2
- 19, 20, 21, 22, 23, 24, // row = 3
+ 1, 2, 3, 4, 5, 6, // row = 0
+ 7, 8, 9, 10, 11, 12, // row = 1
+ 13, 14, 15, 16, 17, 18, // row = 2
+ 19, 20, 21, 22, 23, 24, // row = 3
};
std::vector<float> filter_data{
- 1, 2, -3, -4, // out = 0, row = 0
- -5, 6, -7, 8, // out = 1, row = 0
- 4, -2, 3, -1, // out = 0, row = 1
- -8, -6, 7, 5, // out = 1, row = 1
+ 1, 2, -3, -4, // out = 0, row = 0
+ -5, 6, -7, 8, // out = 1, row = 0
+ 4, -2, 3, -1, // out = 0, row = 1
+ -8, -6, 7, 5, // out = 1, row = 1
};
std::vector<float> bias_data{1, 2};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
{
DepthToSpace::DepthToSpace(const Tensor *input, Tensor *output, const DepthToSpaceParams ¶ms)
- : KernelWithParams<DepthToSpaceParams>({input}, {output}, params)
+ : KernelWithParams<DepthToSpaceParams>({input}, {output}, params)
{
}
DepthwiseConv2D::DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias,
Tensor *output, const DepthwiseConv2DParams ¶ms)
- : KernelWithParams<DepthwiseConv2DParams>({input, filter, bias}, {output}, params)
+ : KernelWithParams<DepthwiseConv2DParams>({input, filter, bias}, {output}, params)
{
}
bias()->shape().dim(0) == channels_out));
const int32_t output_height =
- computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
- _params.dilation_height_factor);
+ computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
+ _params.dilation_height_factor);
const int32_t output_width =
- computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width,
- _params.dilation_width_factor);
+ computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width,
+ _params.dilation_width_factor);
_padding_height = computePadding(_params.stride_height, _params.dilation_height_factor,
input_height, filter_height, output_height);
params.float_activation_max = activation_max;
tflite::reference_ops::DepthwiseConv(
- params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
- getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
- getTensorShape(output()), getTensorData<float>(output()));
+ params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
+ getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
+ getTensorShape(output()), getTensorData<float>(output()));
}
void DepthwiseConv2D::evalQuantizedPerChannel() const
calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
const std::vector<double> effective_output_scales =
- getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+ getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
std::vector<ChannelQuantMultipliers> quant_multipliers_raw =
- quantizeMultipliers(effective_output_scales);
+ quantizeMultipliers(effective_output_scales);
BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw);
for (int batch = 0; batch < batches; ++batch)
const int in_y = in_y_origin + dilation_height_factor * filter_y;
// Zero padding by omitting the areas outside the image.
const bool is_point_inside_image =
- (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
+ (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
if (is_point_inside_image)
{
int32 input_val =
- input_data[calcOffset(input_shape, batch, in_y, in_x, in_channel)];
+ input_data[calcOffset(input_shape, batch, in_y, in_x, in_channel)];
int32 filter_val =
- filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, output_channel)];
+ filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, output_channel)];
acc += (filter_val - filter()->zero_points()[output_channel]) *
(input_val - input()->zero_point());
}
int32_t output_multiplier = quant_multipliers[output_channel].multiplier;
int output_shift = quant_multipliers[output_channel].shift;
int32_t scaled_acc =
- tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+ tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
scaled_acc += output()->zero_point();
scaled_acc = std::max(scaled_acc, activation_min);
scaled_acc = std::min(scaled_acc, activation_max);
output_data[calcOffset(output_shape, batch, out_y, out_x, output_channel)] =
- static_cast<uint8_t>(scaled_acc);
+ static_cast<uint8_t>(scaled_acc);
}
}
}
params.quantized_activation_max = activation_max;
tflite::reference_ops::DepthwiseConv(
- params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(filter()),
- getTensorData<uint8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
- getTensorShape(output()), getTensorData<uint8_t>(output()));
+ params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(filter()),
+ getTensorData<uint8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
+ getTensorShape(output()), getTensorData<uint8_t>(output()));
}
void DepthwiseConv2D::evalQuantizedS16() const
const int32_t depth_multiplier = _params.depth_multiplier;
const std::vector<double> effective_output_scales =
- getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+ getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
std::vector<ChannelQuantMultipliers> quant_multipliers_raw =
- quantizeMultipliers(effective_output_scales);
+ quantizeMultipliers(effective_output_scales);
BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw);
if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
{
const int16_t input_val =
- input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+ input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
const int16_t filter_val =
- filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, out_c)];
+ filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, out_c)];
acc += static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
}
}
int32_t output_multiplier = quant_multipliers[out_c].multiplier;
int output_shift = quant_multipliers[out_c].shift;
int32_t scaled_acc =
- tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+ tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
scaled_acc = std::max(scaled_acc, activation_min);
scaled_acc = std::min(scaled_acc, activation_max);
Shape filter_shape{1, 2, 2, 4};
Shape bias_shape{4};
std::vector<float> input_data{
- 1, 2, 7, 8, //
- 3, 4, 9, 10, //
- 5, 6, 11, 12, //
- 13, 14, 15, 16, //
+ 1, 2, 7, 8, //
+ 3, 4, 9, 10, //
+ 5, 6, 11, 12, //
+ 13, 14, 15, 16, //
};
std::vector<float> filter_data{
- 1, 2, 3, 4, //
- -9, 10, -11, 12, //
- 5, 6, 7, 8, //
- 13, -14, 15, -16, //
+ 1, 2, 3, 4, //
+ -9, 10, -11, 12, //
+ 5, 6, 7, 8, //
+ 13, -14, 15, -16, //
};
std::vector<float> bias_data{1, 2, 3, 4};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
kernel.execute();
std::vector<float> ref_output_data{
- 71, 0, 99, 0, //
- 167, 0, 227, 28, //
+ 71, 0, 99, 0, //
+ 167, 0, 227, 28, //
};
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 4}));
TEST(DepthwiseConv2DTest, Uint8)
{
std::vector<float> input_data{
- 1, 2, 7, 8, // column 1
- 3, 4, 9, 10, // column 2
- 5, 6, 11, 12, // column 3
+ 1, 2, 7, 8, // column 1
+ 3, 4, 9, 10, // column 2
+ 5, 6, 11, 12, // column 3
};
std::vector<float> filter_data{
- 1, 2, 3, 4, //
- -9, 10, -11, 12, //
- 5, 6, 7, 8, //
- 13, -14, 15, -16, //
+ 1, 2, 3, 4, //
+ -9, 10, -11, 12, //
+ 5, 6, 7, 8, //
+ 13, -14, 15, -16, //
};
std::vector<float> bias_data{1, 2, 3, 4};
Tensor filter_tensor = makeInputTensor<DataType::U8>({1, 2, 2, 4}, input_quant_param.first,
input_quant_param.second, filter_data);
Tensor bias_tensor = makeInputTensor<DataType::S32>(
- {4}, input_quant_param.first * input_quant_param.first, 0, bias_data);
+ {4}, input_quant_param.first * input_quant_param.first, 0, bias_data);
Tensor output_tensor =
- makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+ makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
DepthwiseConv2DParams params{};
params.padding = Padding::VALID;
kernel.execute();
std::vector<float> ref_output_data{
- 71, -34, 99, -20, //
- 91, -26, 127, -4, //
+ 71, -34, 99, -20, //
+ 91, -26, 127, -4, //
};
EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 4}));
std::vector<int32_t> ref_output_shape{1, 2, 1, 4};
std::vector<float> input_data{
- 1, 2, 7, 8, //
- 3, 4, 9, 10, //
- 5, 6, 11, 12, //
- 13, 14, 15, 16, //
+ 1, 2, 7, 8, //
+ 3, 4, 9, 10, //
+ 5, 6, 11, 12, //
+ 13, 14, 15, 16, //
};
std::vector<float> filter_data{
- 1, 2, 3, 4, //
- -9, 10, -11, 12, //
- 5, 6, 7, 8, //
- 13, -14, 15, -16, //
+ 1, 2, 3, 4, //
+ -9, 10, -11, 12, //
+ 5, 6, 7, 8, //
+ 13, -14, 15, -16, //
};
std::vector<float> bias_data{1, 2, 3, 4};
std::vector<float> ref_output_data{
- 71, 0, 99, 0, //
- 167, 0, 227, 28, //
+ 71, 0, 99, 0, //
+ 167, 0, 227, 28, //
};
Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data);
std::vector<int32_t> ref_output_shape{1, 2, 1, output_channels};
std::vector<float> input_data{
- 1, 2, 7, 8, //
- 3, 4, 9, 10, //
- 5, 6, 11, 12, //
- 13, 14, 15, 16, //
+ 1, 2, 7, 8, //
+ 3, 4, 9, 10, //
+ 5, 6, 11, 12, //
+ 13, 14, 15, 16, //
};
std::vector<float> filter_data{
- 1, 2, 3, 4, //
- -9, 10, -11, 12, //
- 5, 6, 7, 8, //
- 13, -14, 15, -16, //
+ 1, 2, 3, 4, //
+ -9, 10, -11, 12, //
+ 5, 6, 7, 8, //
+ 13, -14, 15, -16, //
};
std::vector<float> bias_data{1, 2, 3, 4};
std::vector<float> ref_output_data{
- 71, 0, 99, 0, //
- 167, 0, 227, 28, //
+ 71, 0, 99, 0, //
+ 167, 0, 227, 28, //
};
float input_scale = 0.25;
std::vector<int32_t> zerop(4, 0);
Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data);
Tensor filter_tensor =
- makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 3, filter_data);
+ makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 3, filter_data);
Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data);
Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
std::vector<int32_t> ref_output_shape{1, 2, 1, output_channels};
std::vector<float> input_data{
- 1, 2, 7, 8, //
- 3, 4, 9, 10, //
- 5, 6, 11, 12, //
+ 1, 2, 7, 8, //
+ 3, 4, 9, 10, //
+ 5, 6, 11, 12, //
};
std::vector<float> filter_data{
- 1, 2, 3, 4, //
- -9, 10, -11, 12, //
- 5, 6, 7, 8, //
- 13, -14, 15, -16, //
+ 1, 2, 3, 4, //
+ -9, 10, -11, 12, //
+ 5, 6, 7, 8, //
+ 13, -14, 15, -16, //
};
std::vector<float> bias_data{1, 2, 3, 4};
std::vector<float> ref_output_data{
- 71, -34, 99, -20, //
- 91, -26, 127, -4, //
+ 71, -34, 99, -20, //
+ 91, -26, 127, -4, //
};
std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(0, 16);
Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first,
input_quant_param.second, input_data);
Tensor filter_tensor =
- makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops, 3, filter_data);
+ makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops, 3, filter_data);
Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_scales, zerop, 0, bias_data);
Tensor output_tensor =
- makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+ makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
DepthwiseConv2DParams params{};
params.padding = Padding::VALID;
Shape filter_shape{1, 2, 2, 4};
Shape bias_shape{4};
std::vector<float> input_data{
- 1, 2, 7, 8, //
- 3, 4, 9, 10, //
- 5, 6, 11, 12, //
- 13, 14, 15, 16, //
+ 1, 2, 7, 8, //
+ 3, 4, 9, 10, //
+ 5, 6, 11, 12, //
+ 13, 14, 15, 16, //
};
std::vector<float> filter_data{
- 1, 2, 3, 4, //
- -9, 10, -11, 12, //
- 5, 6, 7, 8, //
- 13, -14, 15, -16, //
+ 1, 2, 3, 4, //
+ -9, 10, -11, 12, //
+ 5, 6, 7, 8, //
+ 13, -14, 15, -16, //
};
std::vector<int32_t> bias_data{1, 2, 3, 4};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
Shape filter_shape{1, 2, 2, 4};
Shape bias_shape{4};
std::vector<float> input_data{
- 1, 2, 7, 8, //
- 3, 4, 9, 10, //
- 5, 6, 11, 12, //
- 13, 14, 15, 16, //
+ 1, 2, 7, 8, //
+ 3, 4, 9, 10, //
+ 5, 6, 11, 12, //
+ 13, 14, 15, 16, //
};
std::vector<float> filter_data{
- 1, 2, 3, 4, //
- -9, 10, -11, 12, //
- 5, 6, 7, 8, //
- 13, -14, 15, -16, //
+ 1, 2, 3, 4, //
+ -9, 10, -11, 12, //
+ 5, 6, 7, 8, //
+ 13, -14, 15, -16, //
};
std::vector<float> bias_data{1, 2, 3, 4};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
Shape filter_shape{2, 2, 4};
Shape bias_shape{4};
std::vector<float> input_data{
- 1, 2, 7, 8, //
- 3, 4, 9, 10, //
- 5, 6, 11, 12, //
- 13, 14, 15, 16, //
+ 1, 2, 7, 8, //
+ 3, 4, 9, 10, //
+ 5, 6, 11, 12, //
+ 13, 14, 15, 16, //
};
std::vector<float> filter_data{
- 1, 2, 3, 4, //
- -9, 10, -11, 12, //
- 5, 6, 7, 8, //
- 13, -14, 15, -16, //
+ 1, 2, 3, 4, //
+ -9, 10, -11, 12, //
+ 5, 6, 7, 8, //
+ 13, -14, 15, -16, //
};
std::vector<float> bias_data{1, 2, 3, 4};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
Shape filter_shape{2, 1, 2, 4};
Shape bias_shape{4};
std::vector<float> input_data{
- 1, 2, 7, 8, //
- 3, 4, 9, 10, //
- 5, 6, 11, 12, //
- 13, 14, 15, 16, //
+ 1, 2, 7, 8, //
+ 3, 4, 9, 10, //
+ 5, 6, 11, 12, //
+ 13, 14, 15, 16, //
};
std::vector<float> filter_data{
- 1, 2, 3, 4, //
- -9, 10, -11, 12, //
- 5, 6, 7, 8, //
- 13, -14, 15, -16, //
+ 1, 2, 3, 4, //
+ -9, 10, -11, 12, //
+ 5, 6, 7, 8, //
+ 13, -14, 15, -16, //
};
std::vector<float> bias_data{1, 2, 3, 4};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
Shape filter_shape{1, 2, 4, 2};
Shape bias_shape{4};
std::vector<float> input_data{
- 1, 2, 7, 8, //
- 3, 4, 9, 10, //
- 5, 6, 11, 12, //
- 13, 14, 15, 16, //
+ 1, 2, 7, 8, //
+ 3, 4, 9, 10, //
+ 5, 6, 11, 12, //
+ 13, 14, 15, 16, //
};
std::vector<float> filter_data{
- 1, 2, 3, 4, //
- -9, 10, -11, 12, //
- 5, 6, 7, 8, //
- 13, -14, 15, -16, //
+ 1, 2, 3, 4, //
+ -9, 10, -11, 12, //
+ 5, 6, 7, 8, //
+ 13, -14, 15, -16, //
};
std::vector<float> bias_data{1, 2, 3, 4};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
{
Div::Div(const Tensor *input1, const Tensor *input2, Tensor *output, const DivParams ¶ms)
- : KernelWithParams<DivParams>({input1, input2}, {output}, params)
+ : KernelWithParams<DivParams>({input1, input2}, {output}, params)
{
}
params.float_activation_min = activation_min;
params.float_activation_max = activation_max;
const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
- getTensorShape(input1()), getTensorShape(input2()), ¶ms);
+ getTensorShape(input1()), getTensorShape(input2()), ¶ms);
if (need_broadcast)
{
tflite::reference_ops::BroadcastDivSlow(
- params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
- getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+ params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+ getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
}
else
{
params.quantized_activation_max = activation_max;
const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
- getTensorShape(input1()), getTensorShape(input2()), ¶ms);
+ getTensorShape(input1()), getTensorShape(input2()), ¶ms);
if (need_broadcast)
{
tflite::reference_ops::BroadcastDivSlow(
- params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
- getTensorShape(input2()), getTensorData<uint8_t>(input2()), getTensorShape(output()),
- getTensorData<uint8_t>(output()));
+ params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
+ getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
}
else
{
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.f, 1.f);
Tensor input1_tensor =
- makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, input1_data);
+ makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, input1_data);
Tensor input2_tensor =
- makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, input2_data);
+ makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, input2_data);
Tensor output_tensor =
- makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+ makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
DivParams params{};
params.activation = Activation::RELU;
TEST(EluTest, SimpleElu)
{
Check(
- /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
- /*input_data=*/
- {
- 0, -6, 2, -4, //
- 3, -2, 10, -0.1, //
- },
- /*output_data=*/
- {
- 0.0, -0.997521, 2.0, -0.981684, //
- 3.0, -0.864665, 10.0, -0.0951626, //
- });
+ /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
+ /*input_data=*/
+ {
+ 0, -6, 2, -4, //
+ 3, -2, 10, -0.1, //
+ },
+ /*output_data=*/
+ {
+ 0.0, -0.997521, 2.0, -0.981684, //
+ 3.0, -0.864665, 10.0, -0.0951626, //
+ });
}
TEST(EluTest, InOutTypeMismatch_NEG)
{
Shape input_shape{1, 2, 4, 1};
std::vector<float> input_data{
- 0, -6, 2, -4, //
- 3, -2, 10, -0.1, //
+ 0, -6, 2, -4, //
+ 3, -2, 10, -0.1, //
};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
Tensor output_tensor = makeOutputTensor(DataType::U8);
TEST(EqualTest, FloatSimple)
{
std::vector<float> x_data{
- 0.5, 0.7, 0.9, // Row 1
- 1, 0, -1, // Row 2
+ 0.5, 0.7, 0.9, // Row 1
+ 1, 0, -1, // Row 2
};
std::vector<float> y_data{
- 0.9, 0.7, 0.5, // Row 1
- -1, 0, 1, // Row 2
+ 0.9, 0.7, 0.5, // Row 1
+ -1, 0, 1, // Row 2
};
std::vector<bool> ref_output_data{
- false, true, false, // Row 1
- false, true, false, // Row 2
+ false, true, false, // Row 1
+ false, true, false, // Row 2
};
Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data);
TEST(EqualTest, FloatBroardcast)
{
std::vector<float> x_data{
- 0.5, 0.7, 0.9, // Row 1
- 1, 0, -1, // Row 2
- -1, 0, 1, // Row 3
- 0.9, 0.7, 0.5, // Row 4
+ 0.5, 0.7, 0.9, // Row 1
+ 1, 0, -1, // Row 2
+ -1, 0, 1, // Row 3
+ 0.9, 0.7, 0.5, // Row 4
};
std::vector<float> y_data{
- 0.9, 0.7, 0.5, // Row 1
+ 0.9, 0.7, 0.5, // Row 1
};
std::vector<bool> ref_output_data{
- false, true, false, // Row 1
- false, false, false, // Row 2
- false, false, false, // Row 3
- true, true, true, // Row 4
+ false, true, false, // Row 1
+ false, false, false, // Row 2
+ false, false, false, // Row 3
+ true, true, true, // Row 4
};
Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({4, 3}, x_data);
TEST(EqualTest, Uint8Quantized)
{
std::vector<float> x_data{
- 0.5, 0.5, 0.7, 0.9, // Row 1
- 1, 0, 0.05, -1, // Row 2
+ 0.5, 0.5, 0.7, 0.9, // Row 1
+ 1, 0, 0.05, -1, // Row 2
};
std::vector<float> y_data{
- 0.9, 0.5, 0.55, 0.5, // Row 1
- -1, 0, 0.05, 1, // Row 2
+ 0.9, 0.5, 0.55, 0.5, // Row 1
+ -1, 0, 0.05, 1, // Row 2
};
std::vector<bool> ref_output_data{
- false, true, false, false, // Row 1
- false, true, true, false, // Row 2
+ false, true, false, false, // Row 1
+ false, true, true, false, // Row 2
};
std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
- Tensor x_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first,
- x_quant_param.second, x_data);
+ Tensor x_tensor =
+ makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data);
std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 2);
- Tensor y_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first,
- y_quant_param.second, y_data);
+ Tensor y_tensor =
+ makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data);
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
TEST(EqualTest, Uint8QuantizedBroadcast)
{
std::vector<float> x_data{
- 0.4, -0.8, 0.7, 0.3, // Row 1
- -0.5, 0.1, 0, 0.5, // Row 2
- 1, 0, 0.05, -1, // Row 3
- -1, 0.05, 0, 1, // Row 4
+ 0.4, -0.8, 0.7, 0.3, // Row 1
+ -0.5, 0.1, 0, 0.5, // Row 2
+ 1, 0, 0.05, -1, // Row 3
+ -1, 0.05, 0, 1, // Row 4
};
std::vector<float> y_data{
- -1, 0.05, 0, 1, // Row 1
+ -1, 0.05, 0, 1, // Row 1
};
std::vector<bool> ref_output_data{
- false, false, false, false, // Row 1
- false, false, true, false, // Row 2
- false, false, false, false, // Row 3
- true, true, true, true, // Row 4
+ false, false, false, false, // Row 1
+ false, false, true, false, // Row 2
+ false, false, false, false, // Row 3
+ true, true, true, true, // Row 4
};
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
Tensor x_tensor =
- makeInputTensor<DataType::U8>({1, 4, 4, 1}, quant_param.first, quant_param.second, x_data);
+ makeInputTensor<DataType::U8>({1, 4, 4, 1}, quant_param.first, quant_param.second, x_data);
Tensor y_tensor =
- makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
+ makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
Equal kernel(&x_tensor, &y_tensor, &output_tensor);
{
std::initializer_list<int32_t> input_shape{1, 2, 4, 1};
std::vector<float> input_data{
- 0.2, 8.6, 2.4, 4.3, // Row 1
- 3, 7.1, 10.5, -0.9, // Row 2
+ 0.2, 8.6, 2.4, 4.3, // Row 1
+ 3, 7.1, 10.5, -0.9, // Row 2
};
std::initializer_list<int32_t> ref_output_shape{1, 2, 4, 1};
std::vector<float> ref_output_data{
- 0, 8, 2, 4, // Row 1
- 3, 7, 10, -1, // Row 2
+ 0, 8, 2, 4, // Row 1
+ 3, 7, 10, -1, // Row 2
};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
{
FloorDiv::FloorDiv(const Tensor *input, const Tensor *alpha, Tensor *output)
- : Kernel({input, alpha}, {output})
+ : Kernel({input, alpha}, {output})
{
}
if (x()->shape() != y()->shape())
{
tflite::reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>(
- getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
- getTensorData<float>(output()), FloorDivFunc);
+ getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+ getTensorData<float>(output()), FloorDivFunc);
}
else
{
tflite::reference_ops::BinaryFunction<float, float, float>(
- getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
- getTensorData<float>(output()), FloorDivFunc);
+ getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+ getTensorData<float>(output()), FloorDivFunc);
}
}
{
Shape x_shape{2, 3};
std::vector<float> x_data{
- 0.5, 2.4, 3.1, // Row 1
- 1.9, -1.9, -2.8, // Row 2
+ 0.5, 2.4, 3.1, // Row 1
+ 1.9, -1.9, -2.8, // Row 2
};
Shape y_shape = x_shape;
std::vector<float> y_data{
- 2.0, 0.5, 3.0, // Row 1
- 1.0, -1.0, -2.0, // Row 2
+ 2.0, 0.5, 3.0, // Row 1
+ 1.0, -1.0, -2.0, // Row 2
};
std::vector<int32_t> ref_output_shape{2, 3};
std::vector<float> ref_output_data{
- 0, 4, 1, // Row 1
- 1, 1, 1, // Row 2
+ 0, 4, 1, // Row 1
+ 1, 1, 1, // Row 2
};
Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data);
{
Shape x_shape{1, 3};
std::vector<float> x_data{
- 0.5, 2.4, -3.1, // Row 1
+ 0.5, 2.4, -3.1, // Row 1
};
Shape y_shape{3, 3};
std::vector<float> y_data{
- 1.0, 1.0, 1.0, // Row 1
- 2.0, -0.5, -2.0, // Row 2
- 0.3, 0.7, 0.9, // Row 3
+ 1.0, 1.0, 1.0, // Row 1
+ 2.0, -0.5, -2.0, // Row 2
+ 0.3, 0.7, 0.9, // Row 3
};
std::vector<int32_t> ref_output_shape{3, 3};
std::vector<float> ref_output_data{
- 0, 2, -4, // Row 1
- 0, -5, 1, // Row 2
- 1, 3, -4, // Row 3
+ 0, 2, -4, // Row 1
+ 0, -5, 1, // Row 2
+ 1, 3, -4, // Row 3
};
Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data);
FullyConnected::FullyConnected(const Tensor *input, const Tensor *weights, const Tensor *bias,
Tensor *output, const FullyConnectedParams ¶ms)
- : KernelWithParams<FullyConnectedParams>({input, weights, bias}, {output}, params)
+ : KernelWithParams<FullyConnectedParams>({input, weights, bias}, {output}, params)
{
}
params.weights_format = tflite::FullyConnectedWeightsFormat::kDefault;
tflite::reference_ops::FullyConnected(
- params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(weights()),
- getTensorData<float>(weights()), getTensorShape(bias()), getTensorData<float>(bias()),
- getTensorShape(output()), getTensorData<float>(output()));
+ params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(weights()),
+ getTensorData<float>(weights()), getTensorShape(bias()), getTensorData<float>(bias()),
+ getTensorShape(output()), getTensorData<float>(output()));
}
void FullyConnected::evalQuantized() const
int32_t output_activation_max;
int32_t output_multiplier;
real_multiplier =
- getQuantizedConvolutionMultipler(input()->scale(), weights()->scale(), output()->scale());
+ getQuantizedConvolutionMultipler(input()->scale(), weights()->scale(), output()->scale());
quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
calculateActivationRangeQuantized(params().activation, output(), &output_activation_min,
&output_activation_max);
op_params.lhs_cacheable = false;
op_params.rhs_cacheable = false;
tflite::reference_ops::FullyConnected(
- op_params, getTensorShape(input()), getTensorData<uint8_t>(input()),
- getTensorShape(weights()), getTensorData<uint8_t>(weights()), getTensorShape(bias()),
- getTensorData<int32_t>(bias()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+ op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(weights()),
+ getTensorData<uint8_t>(weights()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
+ getTensorShape(output()), getTensorData<uint8_t>(output()));
}
} // namespace kernels
template <>
void Check<uint8_t>(
- std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> weights_shape,
- std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
- std::initializer_list<float> input_data, std::initializer_list<float> weights_data,
- std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
+ std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> weights_shape,
+ std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data, std::initializer_list<float> weights_data,
+ std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
{
const float quantized_tolerance = getTolerance(-127, 128, 255);
std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
Tensor weights_tensor = makeInputTensor<DataType::U8>(weights_shape, input_quant_param.first,
input_quant_param.second, weights_data);
Tensor bias_tensor = makeInputTensor<DataType::S32>(
- bias_shape, input_quant_param.first * input_quant_param.first, 0, bias_data);
+ bias_shape, input_quant_param.first * input_quant_param.first, 0, bias_data);
Tensor output_tensor =
- makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+ makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
FullyConnectedParams params{};
params.activation = Activation::RELU;
{
Check<TypeParam>({3, 2, 2, 1}, {3, 6}, {3}, {2, 3},
{
- -3, -5, 5, 4, 9, -2, // batch = 0
- -3, -2, -4, 9, -8, 1, // batch = 1
+ -3, -5, 5, 4, 9, -2, // batch = 0
+ -3, -2, -4, 9, -8, 1, // batch = 1
},
{
- -3, -7, 4, -4, -6, 4, // unit = 0
- 3, 5, 2, 3, -3, -8, // unit = 1
- -3, 7, 4, 9, 0, -5, // unit = 2
+ -3, -7, 4, -4, -6, 4, // unit = 0
+ 3, 5, 2, 3, -3, -8, // unit = 1
+ -3, 7, 4, 9, 0, -5, // unit = 2
},
- {-1, -5, -8}, {
- 0, 0, 32, // batch = 0
- 22, 11, 47, // batch = 1
- });
+ {-1, -5, -8},
+ {
+ 0, 0, 32, // batch = 0
+ 22, 11, 47, // batch = 1
+ });
}
TEST(FullyConnectedTest, InvalidBiasType_NEG)
{
Shape input_shape{3, 2, 2, 1};
std::vector<float> input_data{
- -3, -5, 5, 4, 9, -2, // batch = 0
- -3, -2, -4, 9, -8, 1, // batch = 1
+ -3, -5, 5, 4, 9, -2, // batch = 0
+ -3, -2, -4, 9, -8, 1, // batch = 1
};
Shape weights_shape{3, 6};
std::vector<float> weights_data{
- -3, -7, 4, -4, -6, 4, // unit = 0
- 3, 5, 2, 3, -3, -8, // unit = 1
- -3, 7, 4, 9, 0, -5, // unit = 2
+ -3, -7, 4, -4, -6, 4, // unit = 0
+ 3, 5, 2, 3, -3, -8, // unit = 1
+ -3, 7, 4, 9, 0, -5, // unit = 2
};
Shape bias_shape{3};
std::vector<int32_t> bias_data{-1, -5, -8};
{
Shape input_shape{3, 2, 2, 1};
std::vector<float> input_data{
- -3, -5, 5, 4, 9, -2, // batch = 0
- -3, -2, -4, 9, -8, 1, // batch = 1
+ -3, -5, 5, 4, 9, -2, // batch = 0
+ -3, -2, -4, 9, -8, 1, // batch = 1
};
Shape weights_shape{1, 3, 6};
std::vector<float> weights_data{
- -3, -7, 4, -4, -6, 4, // unit = 0
- 3, 5, 2, 3, -3, -8, // unit = 1
- -3, 7, 4, 9, 0, -5, // unit = 2
+ -3, -7, 4, -4, -6, 4, // unit = 0
+ 3, 5, 2, 3, -3, -8, // unit = 1
+ -3, 7, 4, 9, 0, -5, // unit = 2
};
Shape bias_shape{3};
std::vector<float> bias_data{-1, -5, -8};
{
Shape input_shape{3, 2, 2, 1};
std::vector<float> input_data{
- -3, -5, 5, 4, 9, -2, // batch = 0
- -3, -2, -4, 9, -8, 1, // batch = 1
+ -3, -5, 5, 4, 9, -2, // batch = 0
+ -3, -2, -4, 9, -8, 1, // batch = 1
};
Shape weights_shape{6, 3};
std::vector<float> weights_data{
- -3, -7, 4, // unit = 0
- -4, -6, 4, // unit = 1
- 3, 5, 2, // unit = 2
- 3, -3, -8, // unit = 3
- -3, 7, 4, // unit = 4
- 9, 0, -5, // unit = 5
+ -3, -7, 4, // unit = 0
+ -4, -6, 4, // unit = 1
+ 3, 5, 2, // unit = 2
+ 3, -3, -8, // unit = 3
+ -3, 7, 4, // unit = 4
+ 9, 0, -5, // unit = 5
};
Shape bias_shape{3};
std::vector<float> bias_data{-1, -5, -8};
TEST(GreaterTest, FloatSimple)
{
std::vector<float> x_data{
- 0.5, 0.7, 0.9, // Row 1
- 1, 0, -1, // Row 2
+ 0.5, 0.7, 0.9, // Row 1
+ 1, 0, -1, // Row 2
};
std::vector<float> y_data{
- 0.9, 0.7, 0.5, // Row 1
- -1, 0, 1, // Row 2
+ 0.9, 0.7, 0.5, // Row 1
+ -1, 0, 1, // Row 2
};
std::vector<bool> ref_output_data{
- false, false, true, // Row 1
- true, false, false, // Row 2
+ false, false, true, // Row 1
+ true, false, false, // Row 2
};
Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data);
TEST(GreaterTest, FloatBroardcast)
{
std::vector<float> x_data{
- 0.5, 0.7, 0.9, // Row 1
- 1, 0, -1, // Row 2
- -1, 0, 1, // Row 3
+ 0.5, 0.7, 0.9, // Row 1
+ 1, 0, -1, // Row 2
+ -1, 0, 1, // Row 3
};
std::vector<float> y_data{
- 0.9, 0.7, 0.5, // Row 1
+ 0.9, 0.7, 0.5, // Row 1
};
std::vector<bool> ref_output_data{
- false, false, true, // Row 1
- true, false, false, // Row 2
- false, false, true, // Row 3
+ false, false, true, // Row 1
+ true, false, false, // Row 2
+ false, false, true, // Row 3
};
Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data);
TEST(GreaterTest, Uint8Quantized)
{
std::vector<float> x_data{
- 0.5, 0.6, 0.7, 0.9, // Row 1
- 1, 0, 0.05, -1, // Row 2
+ 0.5, 0.6, 0.7, 0.9, // Row 1
+ 1, 0, 0.05, -1, // Row 2
};
std::vector<float> y_data{
- 0.9, 0.6, 0.6, 0.5, // Row 1
- -1, 0.05, 0, 1, // Row 2
+ 0.9, 0.6, 0.6, 0.5, // Row 1
+ -1, 0.05, 0, 1, // Row 2
};
std::vector<bool> ref_output_data{
- false, false, true, true, // Row 1
- true, false, true, false, // Row 2
+ false, false, true, true, // Row 1
+ true, false, true, false, // Row 2
};
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
Tensor x_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data);
+ makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data);
Tensor y_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data);
+ makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data);
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
Greater kernel(&x_tensor, &y_tensor, &output_tensor);
TEST(GreaterTest, Uint8QuantizedRescale)
{
std::vector<float> x_data{
- 0.5, 0.6, 0.7, 0.9, // Row 1
- 1, 0, 0.05, -1, // Row 2
+ 0.5, 0.6, 0.7, 0.9, // Row 1
+ 1, 0, 0.05, -1, // Row 2
};
std::vector<float> y_data{
- 0.9, 0.6, 0.6, 0.5, // Row 1
- -1, 0.05, 0, 1, // Row 2
+ 0.9, 0.6, 0.6, 0.5, // Row 1
+ -1, 0.05, 0, 1, // Row 2
};
std::vector<bool> ref_output_data{
- false, false, true, true, // Row 1
- true, false, true, false, // Row 2
+ false, false, true, true, // Row 1
+ true, false, true, false, // Row 2
};
std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 3);
- Tensor x_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first,
- x_quant_param.second, x_data);
- Tensor y_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first,
- y_quant_param.second, y_data);
+ Tensor x_tensor =
+ makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data);
+ Tensor y_tensor =
+ makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data);
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
Greater kernel(&x_tensor, &y_tensor, &output_tensor);
TEST(GreaterTest, Uint8QuantizedBroadcast)
{
std::vector<float> x_data{
- 0.4, -0.8, 0.7, 0.3, // Row 1
- -0.5, 0.1, 0, 0.5, // Row 2
- 1, 0, 0.05, -1, // Row 3
+ 0.4, -0.8, 0.7, 0.3, // Row 1
+ -0.5, 0.1, 0, 0.5, // Row 2
+ 1, 0, 0.05, -1, // Row 3
};
std::vector<float> y_data{
- -1, 0.05, 0, 1, // Row 1
+ -1, 0.05, 0, 1, // Row 1
};
std::vector<bool> ref_output_data{
- true, false, true, false, // Row 1
- true, true, false, false, // Row 2
- true, false, true, false, // Row 3
+ true, false, true, false, // Row 1
+ true, true, false, false, // Row 2
+ true, false, true, false, // Row 3
};
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
Tensor x_tensor =
- makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data);
+ makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data);
Tensor y_tensor =
- makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
+ makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
Greater kernel(&x_tensor, &y_tensor, &output_tensor);
{
GreaterEqual::GreaterEqual(const Tensor *x, const Tensor *y, Tensor *output)
- : Kernel({x, y}, {output})
+ : Kernel({x, y}, {output})
{
}
if (op_params.is_broadcast)
{
tflite::reference_ops::Broadcast4DSlowGreaterEqualWithScaling(
- op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data,
- getTensorShape(output()), output_data);
+ op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
}
else
{
TEST(GreaterEqualTest, FloatSimple)
{
std::vector<float> x_data{
- 0.5, 0.7, 0.9, // Row 1
- 1, 0, -1, // Row 2
+ 0.5, 0.7, 0.9, // Row 1
+ 1, 0, -1, // Row 2
};
std::vector<float> y_data{
- 0.9, 0.7, 0.5, // Row 1
- -1, 0, 1, // Row 2
+ 0.9, 0.7, 0.5, // Row 1
+ -1, 0, 1, // Row 2
};
std::vector<bool> ref_output_data{
- false, true, true, // Row 1
- true, true, false, // Row 2
+ false, true, true, // Row 1
+ true, true, false, // Row 2
};
Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data);
TEST(GreaterEqualTest, FloatBroardcast)
{
std::vector<float> x_data{
- 0.5, 0.7, 0.9, // Row 1
- 1, 0, -1, // Row 2
- -1, 0, 1, // Row 3
+ 0.5, 0.7, 0.9, // Row 1
+ 1, 0, -1, // Row 2
+ -1, 0, 1, // Row 3
};
std::vector<float> y_data{
- 0.9, 0.7, 0.5, // Row 1
+ 0.9, 0.7, 0.5, // Row 1
};
std::vector<bool> ref_output_data{
- false, true, true, // Row 1
- true, false, false, // Row 2
- false, false, true, // Row 3
+ false, true, true, // Row 1
+ true, false, false, // Row 2
+ false, false, true, // Row 3
};
Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data);
TEST(GreaterEqualTest, Uint8Quantized)
{
std::vector<float> x_data{
- 0.5, 0.6, 0.7, 0.9, // Row 1
- 1, 0, 0.05, -1, // Row 2
+ 0.5, 0.6, 0.7, 0.9, // Row 1
+ 1, 0, 0.05, -1, // Row 2
};
std::vector<float> y_data{
- 0.9, 0.6, 0.55, 0.5, // Row 1
- -1, 0.05, 0, 1, // Row 2
+ 0.9, 0.6, 0.55, 0.5, // Row 1
+ -1, 0.05, 0, 1, // Row 2
};
std::vector<bool> ref_output_data{
- false, true, true, true, // Row 1
- true, false, true, false, // Row 2
+ false, true, true, true, // Row 1
+ true, false, true, false, // Row 2
};
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
Tensor x_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data);
+ makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data);
Tensor y_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data);
+ makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data);
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
TEST(GreaterEqualTest, Uint8QuantizedRescale)
{
std::vector<float> x_data{
- 0.5, 0.5, 0.7, 0.9, // Row 1
- 1, 0, 0.05, -1, // Row 2
+ 0.5, 0.5, 0.7, 0.9, // Row 1
+ 1, 0, 0.05, -1, // Row 2
};
std::vector<float> y_data{
- 0.9, 0.5, 0.6, 0.5, // Row 1
- -1, 0.05, 0, 1, // Row 2
+ 0.9, 0.5, 0.6, 0.5, // Row 1
+ -1, 0.05, 0, 1, // Row 2
};
std::vector<bool> ref_output_data{
- false, true, true, true, // Row 1
- true, false, true, false, // Row 2
+ false, true, true, true, // Row 1
+ true, false, true, false, // Row 2
};
std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5);
- Tensor x_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first,
- x_quant_param.second, x_data);
- Tensor y_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first,
- y_quant_param.second, y_data);
+ Tensor x_tensor =
+ makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data);
+ Tensor y_tensor =
+ makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data);
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
TEST(GreaterEqualTest, Uint8QuantizedBroadcast)
{
std::vector<float> x_data{
- 0.4, -0.8, 0.7, 0.3, // Row 1
- -0.5, 0.1, 0, 0.5, // Row 2
- 1, 0, 0.05, -1, // Row 3
+ 0.4, -0.8, 0.7, 0.3, // Row 1
+ -0.5, 0.1, 0, 0.5, // Row 2
+ 1, 0, 0.05, -1, // Row 3
};
std::vector<float> y_data{
- -1, 0.05, 0, 1, // Row 1
+ -1, 0.05, 0, 1, // Row 1
};
std::vector<bool> ref_output_data{
- true, false, true, false, // Row 1
- true, true, true, false, // Row 2
- true, false, true, false, // Row 3
+ true, false, true, false, // Row 1
+ true, true, true, false, // Row 2
+ true, false, true, false, // Row 3
};
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
Tensor x_tensor =
- makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data);
+ makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data);
Tensor y_tensor =
- makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
+ makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
If::If(const Tensor *cond, const std::vector<const Tensor *> &inputs, std::vector<Tensor *> outputs,
RuntimeGraph *then_graph, RuntimeGraph *else_graph)
- : Kernel(joinInputs(cond, inputs), std::move(outputs)), _then_graph(then_graph),
- _else_graph(else_graph)
+ : Kernel(joinInputs(cond, inputs), std::move(outputs)), _then_graph(then_graph),
+ _else_graph(else_graph)
{
}
{
RuntimeGraph *graph = module->addGraph();
Tensor *input1 = graph->addTensor(
- std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+ std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
Tensor *input2 = graph->addTensor(
- std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+ std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
Tensor *output = graph->addTensor(
- std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+ std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
graph->setInputTensors({input1, input2});
graph->setOutputTensors({output});
{
RuntimeGraph *graph = module->addGraph();
Tensor *input1 = graph->addTensor(
- std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+ std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
Tensor *input2 = graph->addTensor(
- std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+ std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
Tensor *output = graph->addTensor(
- std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+ std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
graph->setInputTensors({input1, input2});
graph->setOutputTensors({output});
InstanceNorm::InstanceNorm(const Tensor *input, const Tensor *gamma, const Tensor *beta,
Tensor *output, const InstanceNormParams ¶ms)
- : KernelWithParams<InstanceNormParams>({input, gamma, beta}, {output}, params)
+ : KernelWithParams<InstanceNormParams>({input, gamma, beta}, {output}, params)
{
}
for (int32_t width = 0; width < widths; width++)
{
double input_value =
- input_data[tflite::Offset(output_shape, batch, height, width, channel)];
+ input_data[tflite::Offset(output_shape, batch, height, width, channel)];
double output_value = input_value * a + b;
output_data[tflite::Offset(output_shape, batch, height, width, channel)] =
- tflite::ActivationFunctionWithMinMax((float)output_value, activation_min,
- activation_max);
+ tflite::ActivationFunctionWithMinMax((float)output_value, activation_min,
+ activation_max);
}
}
}
{
L2Normalize::L2Normalize(const Tensor *input, Tensor *output, const L2NormParams ¶ms)
- : KernelWithParams<L2NormParams>({input}, {output}, params)
+ : KernelWithParams<L2NormParams>({input}, {output}, params)
{
}
std::initializer_list<float> output_data)
{
std::pair<float, int32_t> quant_param =
- quantizationParams<uint8_t>(std::min(input_data) < 0 ? std::min(input_data) : 0.f,
- std::max(input_data) > 0 ? std::max(input_data) : 0.f);
+ quantizationParams<uint8_t>(std::min(input_data) < 0 ? std::min(input_data) : 0.f,
+ std::max(input_data) > 0 ? std::max(input_data) : 0.f);
Tensor input_tensor =
- makeInputTensor<DataType::U8>(input_shape, quant_param.first, quant_param.second, input_data);
+ makeInputTensor<DataType::U8>(input_shape, quant_param.first, quant_param.second, input_data);
Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 128., 128);
L2NormParams params{};
{
L2Pool2D::L2Pool2D(const Tensor *input, Tensor *output, const Pool2DParams ¶ms)
- : KernelWithParams<Pool2DParams>({input}, {output}, params)
+ : KernelWithParams<Pool2DParams>({input}, {output}, params)
{
}
int out_width, out_height;
out_width = computeOutputSize(padding, width, params().filter_width, params().stride_width, 1);
out_height =
- computeOutputSize(padding, height, params().filter_height, params().stride_height, 1);
+ computeOutputSize(padding, height, params().filter_height, params().stride_height, 1);
_padding_width =
- computePadding(params().stride_width, 1, width, params().filter_width, out_width);
+ computePadding(params().stride_width, 1, width, params().filter_width, out_width);
_padding_height =
- computePadding(params().stride_height, 1, height, params().filter_height, out_height);
+ computePadding(params().stride_height, 1, height, params().filter_height, out_height);
LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::FLOAT32);
output()->resize({batches, out_height, out_width, channels_out});
{
Shape input_shape{1, 2, 4, 1};
std::vector<float> input_data{
- 0, 6, 2, 4, //
- 3, 2, 10, 7, //
+ 0, 6, 2, 4, //
+ 3, 2, 10, 7, //
};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
{
Shape input_shape{1, 2, 4, 1};
std::vector<float> input_data{
- -1, -6, 2, 4, //
- -3, -2, 10, 7, //
+ -1, -6, 2, 4, //
+ -3, -2, 10, 7, //
};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
{
Shape input_shape{1, 2, 4, 1};
std::vector<float> input_data{
- -0.1, -0.6, 2, 4, //
- -0.3, -0.2, 10, 7, //
+ -0.1, -0.6, 2, 4, //
+ -0.3, -0.2, 10, 7, //
};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
{
Shape input_shape{1, 2, 4, 1};
std::vector<float> input_data{
- -0.1, -0.6, 2, 4, //
- -0.3, -0.2, 10, 7, //
+ -0.1, -0.6, 2, 4, //
+ -0.3, -0.2, 10, 7, //
};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
{
Shape input_shape{1, 2, 4, 1};
std::vector<float> input_data{
- 0, 6, 2, 4, //
- 3, 2, 10, 7, //
+ 0, 6, 2, 4, //
+ 3, 2, 10, 7, //
};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
{
Shape input_shape{1, 2, 4, 1};
std::vector<float> input_data{
- 0, 6, 2, 4, //
- 3, 2, 10, 7, //
+ 0, 6, 2, 4, //
+ 3, 2, 10, 7, //
};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
{
Shape input_shape{1, 2, 4, 1};
std::vector<float> input_data{
- 0, 6, 2, 4, //
- 3, 2, 10, 7, //
+ 0, 6, 2, 4, //
+ 3, 2, 10, 7, //
};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
{
Shape input_shape{1, 2, 4};
std::vector<float> input_data{
- 0, 6, 2, 4, //
- 3, 2, 10, 7, //
+ 0, 6, 2, 4, //
+ 3, 2, 10, 7, //
};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
{
Shape input_shape{1, 2, 4};
std::vector<float> input_data{
- 0, 6, 2, 4, //
- 3, 2, 10, 7, //
+ 0, 6, 2, 4, //
+ 3, 2, 10, 7, //
};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
Tensor output_tensor = makeOutputTensor(DataType::U8);
{
LeakyRelu::LeakyRelu(const Tensor *input, Tensor *output, const LeakyReluParams ¶ms)
- : KernelWithParams<LeakyReluParams>({input}, {output}, params)
+ : KernelWithParams<LeakyReluParams>({input}, {output}, params)
{
}
op_params.output_shift_identity = _output_shift_identity;
tflite::reference_ops::QuantizeLeakyRelu(
- op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(output()),
- getTensorData<uint8_t>(output()));
+ op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
}
} // namespace kernels
const float quantized_tolerance = getTolerance(-8, 127.f / 16.f, 255);
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-8, 127.f / 16.f);
Tensor input_tensor =
- makeInputTensor<DataType::U8>(input_shape, quant_param.first, quant_param.second, input_data);
+ makeInputTensor<DataType::U8>(input_shape, quant_param.first, quant_param.second, input_data);
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
LeakyReluParams params{};
Check<TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 3},
/*input_data=*/
{
- 0.0f, 1.0f, 3.0f, // Row 1
- 1.0f, -1.0f, -2.0f, // Row 2
+ 0.0f, 1.0f, 3.0f, // Row 1
+ 1.0f, -1.0f, -2.0f, // Row 2
},
/*output_data=*/
{
- 0.0f, 1.0f, 3.0f, // Row 1
- 1.0f, -0.5f, -1.0f, // Row 2
+ 0.0f, 1.0f, 3.0f, // Row 1
+ 1.0f, -0.5f, -1.0f, // Row 2
},
/*alpha=*/0.5f);
TEST(LeakReluTest, IvalidInputOutputType_NEG)
{
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, {
- 0.0f, 1.0f, 3.0f, // Row 1
- 1.0f, -1.0f, -2.0f, // Row 2
+ 0.0f, 1.0f, 3.0f, // Row 1
+ 1.0f, -1.0f, -2.0f, // Row 2
});
Tensor output_tensor = makeOutputTensor(DataType::U8);
TEST(LessTest, FloatSimple)
{
std::vector<float> x_data{
- 0.5, 0.7, 0.9, // Row 1
- 1, 0, -1, // Row 2
+ 0.5, 0.7, 0.9, // Row 1
+ 1, 0, -1, // Row 2
};
std::vector<float> y_data{
- 0.9, 0.7, 0.5, // Row 1
- -1, 0, 1, // Row 2
+ 0.9, 0.7, 0.5, // Row 1
+ -1, 0, 1, // Row 2
};
std::vector<bool> ref_output_data{
- true, false, false, // Row 1
- false, false, true, // Row 2
+ true, false, false, // Row 1
+ false, false, true, // Row 2
};
Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data);
TEST(LessTest, FloatBroardcast)
{
std::vector<float> x_data{
- 0.5, 0.7, 0.9, // Row 1
- 1, 0, -1, // Row 2
- -1, 0, 1, // Row 3
+ 0.5, 0.7, 0.9, // Row 1
+ 1, 0, -1, // Row 2
+ -1, 0, 1, // Row 3
};
std::vector<float> y_data{
- 0.9, 0.7, 0.5, // Row 1
+ 0.9, 0.7, 0.5, // Row 1
};
std::vector<bool> ref_output_data{
- true, false, false, // Row 1
- false, true, true, // Row 2
- true, true, false, // Row 3
+ true, false, false, // Row 1
+ false, true, true, // Row 2
+ true, true, false, // Row 3
};
Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data);
TEST(LessTest, Uint8Quantized)
{
std::vector<float> x_data{
- 0.5, 0.6, 0.7, 0.9, // Row 1
- 1, 0, 0.05, -1, // Row 2
+ 0.5, 0.6, 0.7, 0.9, // Row 1
+ 1, 0, 0.05, -1, // Row 2
};
std::vector<float> y_data{
- 0.9, 0.6, 0.55, 0.5, // Row 1
- -1, 0.05, 0, 1, // Row 2
+ 0.9, 0.6, 0.55, 0.5, // Row 1
+ -1, 0.05, 0, 1, // Row 2
};
std::vector<bool> ref_output_data{
- true, false, false, false, // Row 1
- false, true, false, true, // Row 2
+ true, false, false, false, // Row 1
+ false, true, false, true, // Row 2
};
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
Tensor x_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data);
+ makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data);
Tensor y_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data);
+ makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data);
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
Less kernel(&x_tensor, &y_tensor, &output_tensor);
TEST(LessTest, Uint8QuantizedRescale)
{
std::vector<float> x_data{
- 0.5, 0.6, 0.7, 0.9, // Row 1
- 1, 0, 0.05, -1, // Row 2
+ 0.5, 0.6, 0.7, 0.9, // Row 1
+ 1, 0, 0.05, -1, // Row 2
};
std::vector<float> y_data{
- 0.9, 0.6, 0.6, 0.5, // Row 1
- -1, 0.05, 0, 1, // Row 2
+ 0.9, 0.6, 0.6, 0.5, // Row 1
+ -1, 0.05, 0, 1, // Row 2
};
std::vector<bool> ref_output_data{
- true, false, false, false, // Row 1
- false, true, false, true, // Row 2
+ true, false, false, false, // Row 1
+ false, true, false, true, // Row 2
};
std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5);
- Tensor x_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first,
- x_quant_param.second, x_data);
- Tensor y_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first,
- y_quant_param.second, y_data);
+ Tensor x_tensor =
+ makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data);
+ Tensor y_tensor =
+ makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data);
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
Less kernel(&x_tensor, &y_tensor, &output_tensor);
TEST(LessTest, Uint8QuantizedBroadcast)
{
std::vector<float> x_data{
- 0.4, -0.8, 0.7, 0.3, // Row 1
- -0.5, 0.1, 0, 0.5, // Row 2
- 1, 0, 0.05, -1, // Row 3
+ 0.4, -0.8, 0.7, 0.3, // Row 1
+ -0.5, 0.1, 0, 0.5, // Row 2
+ 1, 0, 0.05, -1, // Row 3
};
std::vector<float> y_data{
- -1, 0.05, 0, 1, // Row 1
+ -1, 0.05, 0, 1, // Row 1
};
std::vector<bool> ref_output_data{
- false, true, false, true, // Row 1
- false, false, false, true, // Row 2
- false, true, false, true, // Row 3
+ false, true, false, true, // Row 1
+ false, false, false, true, // Row 2
+ false, true, false, true, // Row 3
};
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
Tensor x_tensor =
- makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data);
+ makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data);
Tensor y_tensor =
- makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
+ makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
Less kernel(&x_tensor, &y_tensor, &output_tensor);
if (op_params.is_broadcast)
{
tflite::reference_ops::Broadcast4DSlowLessEqualWithScaling(
- op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data,
- getTensorShape(output()), output_data);
+ op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
}
else
{
TEST(LessEqualTest, FloatSimple)
{
std::vector<float> x_data{
- 0.5, 0.7, 0.9, // Row 1
- 1, 0, -1, // Row 2
+ 0.5, 0.7, 0.9, // Row 1
+ 1, 0, -1, // Row 2
};
std::vector<float> y_data{
- 0.9, 0.7, 0.5, // Row 1
- -1, 0, 1, // Row 2
+ 0.9, 0.7, 0.5, // Row 1
+ -1, 0, 1, // Row 2
};
std::vector<bool> ref_output_data{
- true, true, false, // Row 1
- false, true, true, // Row 2
+ true, true, false, // Row 1
+ false, true, true, // Row 2
};
Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data);
TEST(LessEqualTest, FloatBroardcast)
{
std::vector<float> x_data{
- 0.5, 0.7, 0.9, // Row 1
- 1, 0, -1, // Row 2
- -1, 0, 1, // Row 3
+ 0.5, 0.7, 0.9, // Row 1
+ 1, 0, -1, // Row 2
+ -1, 0, 1, // Row 3
};
std::vector<float> y_data{
- 0.9, 0.7, 0.5, // Row 1
+ 0.9, 0.7, 0.5, // Row 1
};
std::vector<bool> ref_output_data{
- true, true, false, // Row 1
- false, true, true, // Row 2
- true, true, false, // Row 3
+ true, true, false, // Row 1
+ false, true, true, // Row 2
+ true, true, false, // Row 3
};
Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data);
TEST(LessEqualTest, Uint8Quantized)
{
std::vector<float> x_data{
- 0.5, 0.6, 0.7, 0.9, // Row 1
- 1, 0, 0.05, -1, // Row 2
+ 0.5, 0.6, 0.7, 0.9, // Row 1
+ 1, 0, 0.05, -1, // Row 2
};
std::vector<float> y_data{
- 0.9, 0.6, 0.55, 0.5, // Row 1
- -1, 0.05, 0, 1, // Row 2
+ 0.9, 0.6, 0.55, 0.5, // Row 1
+ -1, 0.05, 0, 1, // Row 2
};
std::vector<bool> ref_output_data{
- true, true, false, false, // Row 1
- false, true, false, true, // Row 2
+ true, true, false, false, // Row 1
+ false, true, false, true, // Row 2
};
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
Tensor x_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data);
+ makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data);
Tensor y_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data);
+ makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data);
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
TEST(LessEqualTest, Uint8QuantizedRescale)
{
std::vector<float> x_data{
- 0.5, 0.6, 0.7, 0.9, // Row 1
- 1, 0, 0.05, -1, // Row 2
+ 0.5, 0.6, 0.7, 0.9, // Row 1
+ 1, 0, 0.05, -1, // Row 2
};
std::vector<float> y_data{
- 0.9, 0.6, 0.6, 0.5, // Row 1
- -1, 0.05, 0, 1, // Row 2
+ 0.9, 0.6, 0.6, 0.5, // Row 1
+ -1, 0.05, 0, 1, // Row 2
};
std::vector<bool> ref_output_data{
- true, true, false, false, // Row 1
- false, true, false, true, // Row 2
+ true, true, false, false, // Row 1
+ false, true, false, true, // Row 2
};
std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5);
- Tensor x_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first,
- x_quant_param.second, x_data);
- Tensor y_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first,
- y_quant_param.second, y_data);
+ Tensor x_tensor =
+ makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data);
+ Tensor y_tensor =
+ makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data);
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
TEST(LessEqualTest, Uint8QuantizedBroadcast)
{
std::vector<float> x_data{
- 0.4, -0.8, 0.7, 0.3, // Row 1
- -0.5, 0.1, 0, 0.5, // Row 2
- 1, 0, 0.05, -1, // Row 3
+ 0.4, -0.8, 0.7, 0.3, // Row 1
+ -0.5, 0.1, 0, 0.5, // Row 2
+ 1, 0, 0.05, -1, // Row 3
};
std::vector<float> y_data{
- -1, 0.05, 0, 1, // Row 1
+ -1, 0.05, 0, 1, // Row 1
};
std::vector<bool> ref_output_data{
- false, true, false, true, // Row 1
- false, false, true, true, // Row 2
- false, true, false, true, // Row 3
+ false, true, false, true, // Row 1
+ false, false, true, true, // Row 2
+ false, true, false, true, // Row 3
};
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
Tensor x_tensor =
- makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data);
+ makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data);
Tensor y_tensor =
- makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
+ makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
{
LocalResponseNormalization::LocalResponseNormalization(
- const Tensor *input, Tensor *output, const LocalResponseNormalizationParams ¶ms)
- : KernelWithParams<LocalResponseNormalizationParams>({input}, {output}, params)
+ const Tensor *input, Tensor *output, const LocalResponseNormalizationParams ¶ms)
+ : KernelWithParams<LocalResponseNormalizationParams>({input}, {output}, params)
{
}
op_params.alpha = params().alpha;
op_params.beta = params().beta;
tflite::optimized_ops::LocalResponseNormalization(
- op_params, getTensorShape(input()), getTensorData<float>(input()),
- getTensorShape(output()), getTensorData<float>(output()));
+ op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
break;
default:
throw std::runtime_error("Unsupported type.");
TEST(LocalResponseNormalizationTest, SameAsL2Norm)
{
Tensor input_tensor =
- makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+ makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
LocalResponseNormalizationParams params{};
TEST(LocalResponseNormalizationTest, WithAlpha)
{
Tensor input_tensor =
- makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+ makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
LocalResponseNormalizationParams params{};
TEST(LocalResponseNormalizationTest, WithBias)
{
Tensor input_tensor =
- makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+ makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
LocalResponseNormalizationParams params{};
TEST(LocalResponseNormalizationTest, SmallRadius)
{
Tensor input_tensor =
- makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+ makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
LocalResponseNormalizationParams params{};
TEST(LocalResponseNormalizationTest, InvalidInputDimension_NEG)
{
Tensor input_tensor =
- makeInputTensor<DataType::FLOAT32>({1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+ makeInputTensor<DataType::FLOAT32>({1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
LocalResponseNormalizationParams params{};
TEST(LocalResponseNormalizationTest, InvalidInputOutputType_NEG)
{
Tensor input_tensor =
- makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+ makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
Tensor output_tensor = makeOutputTensor(DataType::U8);
LocalResponseNormalizationParams params{};
{
Shape input_shape{2, 4};
std::vector<float> input_data{
- 0, -6, 2, 4, //
- 3, -2, 10, 1, //
+ 0, -6, 2, 4, //
+ 3, -2, 10, 1, //
};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
kernel.execute();
std::vector<float> ref_output_data{
- -4.14297, -10.14297, -2.14297, -.142971, //
- -7.00104, -12.00104, -.00104087, -9.00104, //
+ -4.14297, -10.14297, -2.14297, -.142971, //
+ -7.00104, -12.00104, -.00104087, -9.00104, //
};
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
}
float kLogSoftmaxQuantizedTolerance = 16. / 256;
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(kMin, kMax);
std::vector<float> input_data{
- 0, -6, 2, 4, //
- 3, -2, 10, 1, //
+ 0, -6, 2, 4, //
+ 3, -2, 10, 1, //
};
Tensor input_tensor =
- makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second, input_data);
+ makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second, input_data);
Tensor output_tensor = makeOutputTensor(DataType::U8, 16. / 256, 255);
LogSoftmax kernel(&input_tensor, &output_tensor);
kernel.execute();
std::vector<float> ref_output_data{
- -4.14297, -10.14297, -2.14297, -.142971, //
- -7.00104, -12.00104, -.00104087, -9.00104, //
+ -4.14297, -10.14297, -2.14297, -.142971, //
+ -7.00104, -12.00104, -.00104087, -9.00104, //
};
std::vector<int32_t> ref_output_shape{2, 4};
EXPECT_THAT(dequantizeTensorData(output_tensor),
TEST(LogSoftmaxTest, InvalidInputOutputType_NEG)
{
std::vector<float> input_data{
- 0, -6, 2, 4, //
- 3, -2, 10, 1, //
+ 0, -6, 2, 4, //
+ 3, -2, 10, 1, //
};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 4}, input_data);
Tensor output_tensor = makeOutputTensor(DataType::U8, 16. / 256, 255);
{
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-10, 10);
std::vector<float> input_data{
- 0, -6, 2, 4, //
- 3, -2, 10, 1, //
+ 0, -6, 2, 4, //
+ 3, -2, 10, 1, //
};
Tensor input_tensor =
- makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second, input_data);
+ makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second, input_data);
Tensor output_tensor = makeOutputTensor(DataType::U8, 20. / 256, 255);
LogSoftmax kernel(&input_tensor, &output_tensor);
{
LogicalAnd::LogicalAnd(const Tensor *input1, const Tensor *input2, Tensor *output)
- : Kernel({input1, input2}, {output})
+ : Kernel({input1, input2}, {output})
{
}
{
LogicalOr::LogicalOr(const Tensor *input1, const Tensor *input2, Tensor *output)
- : Kernel({input1, input2}, {output})
+ : Kernel({input1, input2}, {output})
{
}
std::initializer_list<float> output_data)
{
std::pair<float, int32_t> input_quant_param =
- quantizationParams<uint8_t>(std::min(input_data), std::max(input_data));
+ quantizationParams<uint8_t>(std::min(input_data), std::max(input_data));
Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first,
input_quant_param.second, input_data);
Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0);
TYPED_TEST(LogisticTest, Simple)
{
Check<TypeParam>(
- {89}, {89},
- {-10.0000000000, -9.7727272727, -9.5454545455, -9.3181818182, -9.0909090909, -8.8636363636,
- -8.6363636364, -8.4090909091, -8.1818181818, -7.9545454545, -7.7272727273, -7.5000000000,
- -7.2727272727, -7.0454545455, -6.8181818182, -6.5909090909, -6.3636363636, -6.1363636364,
- -5.9090909091, -5.6818181818, -5.4545454545, -5.2272727273, -5.0000000000, -4.7727272727,
- -4.5454545455, -4.3181818182, -4.0909090909, -3.8636363636, -3.6363636364, -3.4090909091,
- -3.1818181818, -2.9545454545, -2.7272727273, -2.5000000000, -2.2727272727, -2.0454545455,
- -1.8181818182, -1.5909090909, -1.3636363636, -1.1363636364, -0.9090909091, -0.6818181818,
- -0.4545454545, -0.2272727273, 0.0000000000, 0.2272727273, 0.4545454545, 0.6818181818,
- 0.9090909091, 1.1363636364, 1.3636363636, 1.5909090909, 1.8181818182, 2.0454545455,
- 2.2727272727, 2.5000000000, 2.7272727273, 2.9545454545, 3.1818181818, 3.4090909091,
- 3.6363636364, 3.8636363636, 4.0909090909, 4.3181818182, 4.5454545455, 4.7727272727,
- 5.0000000000, 5.2272727273, 5.4545454545, 5.6818181818, 5.9090909091, 6.1363636364,
- 6.3636363636, 6.5909090909, 6.8181818182, 7.0454545455, 7.2727272727, 7.5000000000,
- 7.7272727273, 7.9545454545, 8.1818181818, 8.4090909091, 8.6363636364, 8.8636363636,
- 9.0909090909, 9.3181818182, 9.5454545455, 9.7727272727, 10.0000000000},
- {0.0000453979, 0.0000569815, 0.0000715205, 0.0000897689, 0.0001126729, 0.0001414198,
- 0.0001774998, 0.0002227827, 0.0002796147, 0.0003509396, 0.0004404502, 0.0005527786,
- 0.0006937345, 0.0008706021, 0.0010925128, 0.0013709094, 0.0017201256, 0.0021581065,
- 0.0027073042, 0.0033957870, 0.0042586071, 0.0053394826, 0.0066928509, 0.0083863576,
- 0.0105038445, 0.0131488902, 0.0164489307, 0.0205599431, 0.0256715863, 0.0320125562,
- 0.0398556989, 0.0495221198, 0.0613831074, 0.0758581800, 0.0934070047, 0.1145124805,
- 0.1396521834, 0.1692560327, 0.2036499335, 0.2429886272, 0.2871859014, 0.3358556241,
- 0.3882805886, 0.4434251301, 0.5000000000, 0.5565748699, 0.6117194114, 0.6641443759,
- 0.7128140986, 0.7570113728, 0.7963500665, 0.8307439673, 0.8603478166, 0.8854875195,
- 0.9065929953, 0.9241418200, 0.9386168926, 0.9504778802, 0.9601443011, 0.9679874438,
- 0.9743284137, 0.9794400569, 0.9835510693, 0.9868511098, 0.9894961555, 0.9916136424,
- 0.9933071491, 0.9946605174, 0.9957413929, 0.9966042130, 0.9972926958, 0.9978418935,
- 0.9982798744, 0.9986290906, 0.9989074872, 0.9991293979, 0.9993062655, 0.9994472214,
- 0.9995595498, 0.9996490604, 0.9997203853, 0.9997772173, 0.9998225002, 0.9998585802,
- 0.9998873271, 0.9999102311, 0.9999284795, 0.9999430185, 0.9999546021});
+ {89}, {89},
+ {-10.0000000000, -9.7727272727, -9.5454545455, -9.3181818182, -9.0909090909, -8.8636363636,
+ -8.6363636364, -8.4090909091, -8.1818181818, -7.9545454545, -7.7272727273, -7.5000000000,
+ -7.2727272727, -7.0454545455, -6.8181818182, -6.5909090909, -6.3636363636, -6.1363636364,
+ -5.9090909091, -5.6818181818, -5.4545454545, -5.2272727273, -5.0000000000, -4.7727272727,
+ -4.5454545455, -4.3181818182, -4.0909090909, -3.8636363636, -3.6363636364, -3.4090909091,
+ -3.1818181818, -2.9545454545, -2.7272727273, -2.5000000000, -2.2727272727, -2.0454545455,
+ -1.8181818182, -1.5909090909, -1.3636363636, -1.1363636364, -0.9090909091, -0.6818181818,
+ -0.4545454545, -0.2272727273, 0.0000000000, 0.2272727273, 0.4545454545, 0.6818181818,
+ 0.9090909091, 1.1363636364, 1.3636363636, 1.5909090909, 1.8181818182, 2.0454545455,
+ 2.2727272727, 2.5000000000, 2.7272727273, 2.9545454545, 3.1818181818, 3.4090909091,
+ 3.6363636364, 3.8636363636, 4.0909090909, 4.3181818182, 4.5454545455, 4.7727272727,
+ 5.0000000000, 5.2272727273, 5.4545454545, 5.6818181818, 5.9090909091, 6.1363636364,
+ 6.3636363636, 6.5909090909, 6.8181818182, 7.0454545455, 7.2727272727, 7.5000000000,
+ 7.7272727273, 7.9545454545, 8.1818181818, 8.4090909091, 8.6363636364, 8.8636363636,
+ 9.0909090909, 9.3181818182, 9.5454545455, 9.7727272727, 10.0000000000},
+ {0.0000453979, 0.0000569815, 0.0000715205, 0.0000897689, 0.0001126729, 0.0001414198,
+ 0.0001774998, 0.0002227827, 0.0002796147, 0.0003509396, 0.0004404502, 0.0005527786,
+ 0.0006937345, 0.0008706021, 0.0010925128, 0.0013709094, 0.0017201256, 0.0021581065,
+ 0.0027073042, 0.0033957870, 0.0042586071, 0.0053394826, 0.0066928509, 0.0083863576,
+ 0.0105038445, 0.0131488902, 0.0164489307, 0.0205599431, 0.0256715863, 0.0320125562,
+ 0.0398556989, 0.0495221198, 0.0613831074, 0.0758581800, 0.0934070047, 0.1145124805,
+ 0.1396521834, 0.1692560327, 0.2036499335, 0.2429886272, 0.2871859014, 0.3358556241,
+ 0.3882805886, 0.4434251301, 0.5000000000, 0.5565748699, 0.6117194114, 0.6641443759,
+ 0.7128140986, 0.7570113728, 0.7963500665, 0.8307439673, 0.8603478166, 0.8854875195,
+ 0.9065929953, 0.9241418200, 0.9386168926, 0.9504778802, 0.9601443011, 0.9679874438,
+ 0.9743284137, 0.9794400569, 0.9835510693, 0.9868511098, 0.9894961555, 0.9916136424,
+ 0.9933071491, 0.9946605174, 0.9957413929, 0.9966042130, 0.9972926958, 0.9978418935,
+ 0.9982798744, 0.9986290906, 0.9989074872, 0.9991293979, 0.9993062655, 0.9994472214,
+ 0.9995595498, 0.9996490604, 0.9997203853, 0.9997772173, 0.9998225002, 0.9998585802,
+ 0.9998873271, 0.9999102311, 0.9999284795, 0.9999430185, 0.9999546021});
}
TEST(LogisticTest, IvalidInputOutputType_NEG)
{
MaxPool2D::MaxPool2D(const Tensor *input, Tensor *output, const Pool2DParams ¶ms)
- : KernelWithParams<Pool2DParams>({input}, {output}, params)
+ : KernelWithParams<Pool2DParams>({input}, {output}, params)
{
}
const int32_t input_width = input_shape.dim(2);
const int32_t depth = input_shape.dim(3);
- const int32_t output_height = computeOutputSize(_params.padding, input_height,
- _params.filter_height, _params.stride_height);
+ const int32_t output_height =
+ computeOutputSize(_params.padding, input_height, _params.filter_height, _params.stride_height);
const int32_t output_width =
- computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width);
+ computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width);
_padding_height =
- computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height);
+ computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height);
_padding_width =
- computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
+ computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
output()->resize({batches, output_height, output_width, depth});
if (input()->element_type() == DataType::U8)
params.quantized_activation_max = activation_max;
tflite::reference_integer_ops::MaxPool(
- params, getTensorShape(input()), getTensorData<int16_t>(input()), //
- getTensorShape(output()), getTensorData<int16_t>(output()));
+ params, getTensorShape(input()), getTensorData<int16_t>(input()), //
+ getTensorShape(output()), getTensorData<int16_t>(output()));
}
} // namespace kernels
{
Shape input_shape{1, 3, 5, 1};
std::vector<float> input_data{
- 1, -1, 0, -2, 2, //
- -7, -6, -5, -4, -3, //
- 5, 4, 3, 6, 7, //
+ 1, -1, 0, -2, 2, //
+ -7, -6, -5, -4, -3, //
+ 5, 4, 3, 6, 7, //
};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
kernel.execute();
std::vector<float> ref_output_data{
- 1, 2, //
- 5, 6, //
+ 1, 2, //
+ 5, 6, //
};
std::initializer_list<int32_t> ref_output_shape{1, 2, 2, 1};
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
{
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375, 15.9375);
std::vector<float> input_data{
- 0, -6, 12, 4, //
- -3, -2, 10, 7, //
+ 0, -6, 12, 4, //
+ -3, -2, 10, 7, //
};
- Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first,
- quant_param.second, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, input_data);
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
Pool2DParams params{};
Shape input_shape{1, 3, 5, 1};
std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
std::vector<float> input_data{
- 1, -1, 0, -2, 2, //
- -7, -6, -5, -4, -3, //
- 5, 4, 3, 6, 7, //
+ 1, -1, 0, -2, 2, //
+ -7, -6, -5, -4, -3, //
+ 5, 4, 3, 6, 7, //
};
std::vector<float> ref_output_data{
- 1, 2, //
- 5, 6, //
+ 1, 2, //
+ 5, 6, //
};
Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, 0.2, 0, input_data);
{
Maximum::Maximum(const Tensor *input1, const Tensor *input2, Tensor *output)
- : Kernel({input1, input2}, {output})
+ : Kernel({input1, input2}, {output})
{
}
}
Mean::Mean(const Tensor *input, const Tensor *axes, Tensor *output, const ReducerParams ¶ms)
- : KernelWithParams<ReducerParams>({input, axes}, {output}, params)
+ : KernelWithParams<ReducerParams>({input, axes}, {output}, params)
{
}
tflite::MeanParams params{};
resolveAxes(axes_data, num_axes, ¶ms);
- const bool need_temporaries =
- !(_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
- ((params.axis[0] == 1 && params.axis[1] == 2) ||
- (params.axis[0] == 2 && params.axis[1] == 1)));
+ const bool need_temporaries = !(
+ _params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
+ ((params.axis[0] == 1 && params.axis[1] == 2) || (params.axis[0] == 2 && params.axis[1] == 1)));
if (need_temporaries)
{
_temp_index =
- std::make_unique<Tensor>(DataType::S32, Shape(input_num_dims), AffineQuantization{}, "");
+ std::make_unique<Tensor>(DataType::S32, Shape(input_num_dims), AffineQuantization{}, "");
_resolved_axes =
- std::make_unique<Tensor>(DataType::S32, Shape(num_axes), AffineQuantization{}, "");
+ std::make_unique<Tensor>(DataType::S32, Shape(num_axes), AffineQuantization{}, "");
_temp_sum = std::make_unique<Tensor>(input()->element_type(), output()->shape(),
AffineQuantization{}, "");
}
else
{
tflite::reference_ops::Mean(
- getTensorData<float>(input()), getTensorShape(input()).DimsData(),
- input()->shape().num_dims(), getTensorData<float>(output()),
- getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
- _params.keep_dims, getTensorData<int>(_temp_index.get()),
- getTensorData<int>(_resolved_axes.get()), getTensorData<float>(_temp_sum.get()));
+ getTensorData<float>(input()), getTensorShape(input()).DimsData(),
+ input()->shape().num_dims(), getTensorData<float>(output()),
+ getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
+ _params.keep_dims, getTensorData<int>(_temp_index.get()),
+ getTensorData<int>(_resolved_axes.get()), getTensorData<float>(_temp_sum.get()));
}
}
else if (input()->zero_point() == output()->zero_point() && input()->scale() == output()->scale())
{
tflite::reference_ops::Mean(
- getTensorData<uint8_t>(input()), getTensorShape(input()).DimsData(),
- input()->shape().num_dims(), getTensorData<uint8_t>(output()),
- getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
- _params.keep_dims, getTensorData<int>(_temp_index.get()),
- getTensorData<int>(_resolved_axes.get()), getTensorData<int>(_temp_sum.get()));
+ getTensorData<uint8_t>(input()), getTensorShape(input()).DimsData(),
+ input()->shape().num_dims(), getTensorData<uint8_t>(output()),
+ getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
+ _params.keep_dims, getTensorData<int>(_temp_index.get()),
+ getTensorData<int>(_resolved_axes.get()), getTensorData<int>(_temp_sum.get()));
}
else
{
tflite::reference_ops::QuantizedMeanOrSum<>(
- getTensorData<uint8_t>(input()), input()->zero_point(), input()->scale(),
- getTensorShape(input()).DimsData(), input()->shape().num_dims(),
- getTensorData<uint8_t>(output()), output()->zero_point(), output()->scale(),
- getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
- _params.keep_dims, getTensorData<int>(_temp_index.get()),
- getTensorData<int>(_resolved_axes.get()), getTensorData<int>(_temp_sum.get()),
- /*compute_sum=*/false);
+ getTensorData<uint8_t>(input()), input()->zero_point(), input()->scale(),
+ getTensorShape(input()).DimsData(), input()->shape().num_dims(),
+ getTensorData<uint8_t>(output()), output()->zero_point(), output()->scale(),
+ getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
+ _params.keep_dims, getTensorData<int>(_temp_index.get()),
+ getTensorData<int>(_resolved_axes.get()), getTensorData<int>(_temp_sum.get()),
+ /*compute_sum=*/false);
}
}
assert(output_shape.dim(3) == depth);
const double real_multiplier =
- static_cast<double>(input()->scale()) / static_cast<double>(output()->scale());
+ static_cast<double>(input()->scale()) / static_cast<double>(output()->scale());
int32_t output_multiplier{};
int output_shift{};
}
}
int32_t scaled_acc =
- tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+ tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
// Divide by the number of elements rounding to the nearest integer.
scaled_acc = scaled_acc > 0
- ? (scaled_acc + num_elements_in_axes / 2) / num_elements_in_axes
- : (scaled_acc - num_elements_in_axes / 2) / num_elements_in_axes;
+ ? (scaled_acc + num_elements_in_axes / 2) / num_elements_in_axes
+ : (scaled_acc - num_elements_in_axes / 2) / num_elements_in_axes;
scaled_acc = std::max(scaled_acc, output_min);
scaled_acc = std::min(scaled_acc, output_max);
std::vector<int32_t> axis_data{1};
Tensor input_tensor =
- makeInputTensor<DataType::U8>({3, 2}, quant_param.first, quant_param.second, input_data);
+ makeInputTensor<DataType::U8>({3, 2}, quant_param.first, quant_param.second, input_data);
Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data);
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
std::vector<int32_t> axis_data{1};
Tensor input_tensor =
- makeInputTensor<DataType::U8>({1, 3, 2}, quant_param.first, quant_param.second, input_data);
+ makeInputTensor<DataType::U8>({1, 3, 2}, quant_param.first, quant_param.second, input_data);
Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data);
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
{
Minimum::Minimum(const Tensor *input1, const Tensor *input2, Tensor *output)
- : Kernel({input1, input2}, {output})
+ : Kernel({input1, input2}, {output})
{
}
{
Mul::Mul(const Tensor *input1, const Tensor *input2, Tensor *output, const MulParams ¶ms)
- : KernelWithParams<MulParams>({input1, input2}, {output}, params)
+ : KernelWithParams<MulParams>({input1, input2}, {output}, params)
{
}
params.float_activation_max = activation_max;
const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
- getTensorShape(input1()), getTensorShape(input2()), ¶ms);
+ getTensorShape(input1()), getTensorShape(input2()), ¶ms);
if (need_broadcast)
{
tflite::optimized_ops::BroadcastMul4DSlow(
- params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
- getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+ params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+ getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
}
else
{
Shape base_shape = {2, 3, 1, 2};
std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
std::vector<std::vector<float>> test_outputs = {
- {0.00f, 0.69f, 0.12f, 1.15f, 0.00f, 2.07f, 0.18f, 0.15f, 0.00f, 0.25f, 0.90f, 0.45f,
- 0.16f, 0.00f, 0.00f, 0.00f, 0.80f, 0.00f, 0.24f, 0.84f, 0.00f, 1.40f, 1.20f, 2.52f,
- 0.00f, 0.00f, 0.64f, 0.00f, 0.00f, 0.00f, 0.14f, 0.00f, 0.00f, 0.00f, 0.70f, 0.00f},
- {0.00f, 0.69f, 0.00f, 0.25f, 0.80f, 0.00f, 0.24f, 0.84f, 0.64f, 0.00f, 0.70f, 0.00f},
- {0.00f, 0.46f, 0.00f, 0.69f, 0.12f, 0.00f, 0.18f, 0.10f, 0.27f, 0.15f, 0.00f, 0.00f,
- 0.16f, 0.00f, 0.24f, 0.00f, 0.00f, 0.44f, 0.60f, 1.40f, 1.20f, 2.80f, 1.08f, 2.52f,
- 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.35f, 0.00f, 0.70f, 0.00f, 0.63f, 0.00f},
- {0.00f, 0.46f, 0.27f, 0.15f, 0.00f, 0.44f, 0.60f, 1.40f, 0.00f, 0.00f, 0.63f, 0.00f}};
+ {0.00f, 0.69f, 0.12f, 1.15f, 0.00f, 2.07f, 0.18f, 0.15f, 0.00f, 0.25f, 0.90f, 0.45f,
+ 0.16f, 0.00f, 0.00f, 0.00f, 0.80f, 0.00f, 0.24f, 0.84f, 0.00f, 1.40f, 1.20f, 2.52f,
+ 0.00f, 0.00f, 0.64f, 0.00f, 0.00f, 0.00f, 0.14f, 0.00f, 0.00f, 0.00f, 0.70f, 0.00f},
+ {0.00f, 0.69f, 0.00f, 0.25f, 0.80f, 0.00f, 0.24f, 0.84f, 0.64f, 0.00f, 0.70f, 0.00f},
+ {0.00f, 0.46f, 0.00f, 0.69f, 0.12f, 0.00f, 0.18f, 0.10f, 0.27f, 0.15f, 0.00f, 0.00f,
+ 0.16f, 0.00f, 0.24f, 0.00f, 0.00f, 0.44f, 0.60f, 1.40f, 1.20f, 2.80f, 1.08f, 2.52f,
+ 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.35f, 0.00f, 0.70f, 0.00f, 0.63f, 0.00f},
+ {0.00f, 0.46f, 0.27f, 0.15f, 0.00f, 0.44f, 0.60f, 1.40f, 0.00f, 0.00f, 0.63f, 0.00f}};
std::vector<float> input1_data{-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f,
1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
- << "With shape number " << i;
+ << "With shape number " << i;
}
// Re-run with exchanged inputs.
for (size_t i = 0; i < test_shapes.size(); ++i)
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
- << "With shape number " << i;
+ << "With shape number " << i;
}
}
Shape base_shape = {2, 3, 1, 2};
std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
std::vector<std::vector<int32_t>> ref_output_shapes{
- {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
+ {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
std::vector<float> input1_data{-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f,
1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
std::vector<std::vector<float>> ref_outputs = {
- {0.00f, 0.69f, 0.12f, 1.15f, 0.00f, 2.07f, 0.18f, 0.15f, 0.00f, 0.25f, 0.90f, 0.45f,
- 0.16f, 0.00f, 0.00f, 0.00f, 0.80f, 0.00f, 0.24f, 0.84f, 0.00f, 1.40f, 1.20f, 2.52f,
- 0.00f, 0.00f, 0.64f, 0.00f, 0.00f, 0.00f, 0.14f, 0.00f, 0.00f, 0.00f, 0.70f, 0.00f},
- {0.00f, 0.69f, 0.00f, 0.25f, 0.80f, 0.00f, 0.24f, 0.84f, 0.64f, 0.00f, 0.70f, 0.00f},
- {0.00f, 0.46f, 0.00f, 0.69f, 0.12f, 0.00f, 0.18f, 0.10f, 0.27f, 0.15f, 0.00f, 0.00f,
- 0.16f, 0.00f, 0.24f, 0.00f, 0.00f, 0.44f, 0.60f, 1.40f, 1.20f, 2.80f, 1.08f, 2.52f,
- 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.35f, 0.00f, 0.70f, 0.00f, 0.63f, 0.00f},
- {0.00f, 0.46f, 0.27f, 0.15f, 0.00f, 0.44f, 0.60f, 1.40f, 0.00f, 0.00f, 0.63f, 0.00f}};
+ {0.00f, 0.69f, 0.12f, 1.15f, 0.00f, 2.07f, 0.18f, 0.15f, 0.00f, 0.25f, 0.90f, 0.45f,
+ 0.16f, 0.00f, 0.00f, 0.00f, 0.80f, 0.00f, 0.24f, 0.84f, 0.00f, 1.40f, 1.20f, 2.52f,
+ 0.00f, 0.00f, 0.64f, 0.00f, 0.00f, 0.00f, 0.14f, 0.00f, 0.00f, 0.00f, 0.70f, 0.00f},
+ {0.00f, 0.69f, 0.00f, 0.25f, 0.80f, 0.00f, 0.24f, 0.84f, 0.64f, 0.00f, 0.70f, 0.00f},
+ {0.00f, 0.46f, 0.00f, 0.69f, 0.12f, 0.00f, 0.18f, 0.10f, 0.27f, 0.15f, 0.00f, 0.00f,
+ 0.16f, 0.00f, 0.24f, 0.00f, 0.00f, 0.44f, 0.60f, 1.40f, 1.20f, 2.80f, 1.08f, 2.52f,
+ 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.35f, 0.00f, 0.70f, 0.00f, 0.63f, 0.00f},
+ {0.00f, 0.46f, 0.27f, 0.15f, 0.00f, 0.44f, 0.60f, 1.40f, 0.00f, 0.00f, 0.63f, 0.00f}};
for (size_t i = 0; i < test_shapes.size(); ++i)
{
Tensor input1_tensor = makeInputTensor<DataType::S16>(base_shape, 3.0 / 32767, 0, input1_data);
Tensor input2_tensor =
- makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0, input2_data);
+ makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0, input2_data);
Tensor output_tensor = makeOutputTensor(DataType::S16, 4.0 / 32767, 0);
const float tolerance = output_tensor.scale() * 2;
EXPECT_THAT(extractTensorShape(output_tensor),
::testing::ElementsAreArray(ref_output_shapes[i]))
- << "With shape number " << i;
+ << "With shape number " << i;
EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance))
- << "With shape number " << i;
+ << "With shape number " << i;
}
// Re-run with exchanged inputs and different scales.
for (size_t i = 0; i < test_shapes.size(); ++i)
{
Tensor input1_tensor =
- makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0, input2_data);
+ makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0, input2_data);
Tensor input2_tensor = makeInputTensor<DataType::S16>(base_shape, 4.0 / 32767, 0, input1_data);
Tensor output_tensor = makeOutputTensor(DataType::S16, 3.0 / 32767, 0);
const float tolerance = output_tensor.scale() * 2;
EXPECT_THAT(extractTensorShape(output_tensor),
::testing::ElementsAreArray(ref_output_shapes[i]))
- << "With shape number " << i;
+ << "With shape number " << i;
EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance))
- << "With shape number " << i;
+ << "With shape number " << i;
}
}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Neg.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Neg::Neg(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Neg::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+ output()->resize(input()->shape());
+}
+
+void Neg::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Neg::evalFloat() const
+{
+ tflite::reference_ops::Negate(getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(output()), getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_NEG_H
+#define LUCI_INTERPRETER_KERNELS_NEG_H
+
+#include "core/Kernel.h"
+#include <vector>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Neg : public Kernel
+{
+public:
+ Neg(const Tensor *input, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_NEG_H
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Neg.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<T> input_data, std::initializer_list<T> output_data)
+{
+ constexpr DataType element_type = getElementType<T>();
+ Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
+ Tensor output_tensor = makeOutputTensor(element_type);
+
+ Neg kernel(&input_tensor, &output_tensor);
+
+ kernel.configure();
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(NegTest, FloatSimple)
+{
+ Check<float>(/*input_shape=*/{2, 3},
+ /*output_shape=*/{2, 3},
+ /*input_data=*/
+ {
+ 0.0f, 1.0f, 3.0f, // Row 1
+ 1.0f, -1.0f, -2.0f, // Row 2
+ },
+ /*output_data=*/
+ {
+ 0.0f, -1.0f, -3.0f, // Row 1
+ -1.0f, 1.0f, 2.0f, // Row 2
+ });
+
+ SUCCEED();
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
if (op_params.is_broadcast)
{
tflite::reference_ops::Broadcast4DSlowNotEqualWithScaling(
- op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data,
- getTensorShape(output()), output_data);
+ op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
}
else
{
TEST(NotEqualTest, FloatSimple)
{
std::vector<float> x_data{
- 0.5, 0.7, 0.9, // Row 1
- 1, 0, -1, // Row 2
+ 0.5, 0.7, 0.9, // Row 1
+ 1, 0, -1, // Row 2
};
std::vector<float> y_data{
- 0.9, 0.7, 0.5, // Row 1
- -1, 0, 1, // Row 2
+ 0.9, 0.7, 0.5, // Row 1
+ -1, 0, 1, // Row 2
};
std::vector<bool> ref_output_data{
- true, false, true, // Row 1
- true, false, true, // Row 2
+ true, false, true, // Row 1
+ true, false, true, // Row 2
};
Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data);
TEST(NotEqualTest, FloatBroardcast)
{
std::vector<float> x_data{
- 0.5, 0.7, 0.9, // Row 1
- 1, 0, -1, // Row 2
- -1, 0, 1, // Row 3
- 0.9, 0.7, 0.5, // Row 4
+ 0.5, 0.7, 0.9, // Row 1
+ 1, 0, -1, // Row 2
+ -1, 0, 1, // Row 3
+ 0.9, 0.7, 0.5, // Row 4
};
std::vector<float> y_data{
- 0.9, 0.7, 0.5, // Row 1
+ 0.9, 0.7, 0.5, // Row 1
};
std::vector<bool> ref_output_data{
- true, false, true, // Row 1
- true, true, true, // Row 2
- true, true, true, // Row 3
- false, false, false, // Row 4
+ true, false, true, // Row 1
+ true, true, true, // Row 2
+ true, true, true, // Row 3
+ false, false, false, // Row 4
};
Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({4, 3}, x_data);
TEST(NotEqualTest, Uint8Quantized)
{
std::vector<float> x_data{
- 0.5, 0.5, 0.7, 0.9, // Row 1
- 1, 0, 0.05, -1, // Row 2
+ 0.5, 0.5, 0.7, 0.9, // Row 1
+ 1, 0, 0.05, -1, // Row 2
};
std::vector<float> y_data{
- 0.9, 0.5, 0.55, 0.5, // Row 1
- -1, 0, 0.05, 1, // Row 2
+ 0.9, 0.5, 0.55, 0.5, // Row 1
+ -1, 0, 0.05, 1, // Row 2
};
std::vector<bool> ref_output_data{
- true, false, true, true, // Row 1
- true, false, false, true, // Row 2
+ true, false, true, true, // Row 1
+ true, false, false, true, // Row 2
};
std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
- Tensor x_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first,
- x_quant_param.second, x_data);
+ Tensor x_tensor =
+ makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data);
std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 2);
- Tensor y_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first,
- y_quant_param.second, y_data);
+ Tensor y_tensor =
+ makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data);
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
TEST(NotEqualTest, Uint8QuantizedBroadcast)
{
std::vector<float> x_data{
- 0.4, -0.8, 0.7, 0.3, // Row 1
- -0.5, 0.1, 0, 0.5, // Row 2
- 1, 0, 0.05, -1, // Row 3
- -1, 0.05, 0, 1, // Row 4
+ 0.4, -0.8, 0.7, 0.3, // Row 1
+ -0.5, 0.1, 0, 0.5, // Row 2
+ 1, 0, 0.05, -1, // Row 3
+ -1, 0.05, 0, 1, // Row 4
};
std::vector<float> y_data{
- -1, 0.05, 0, 1, // Row 1
+ -1, 0.05, 0, 1, // Row 1
};
std::vector<bool> ref_output_data{
- true, true, true, true, // Row 1
- true, true, false, true, // Row 2
- true, true, true, true, // Row 3
- false, false, false, false, // Row 4
+ true, true, true, true, // Row 1
+ true, true, false, true, // Row 2
+ true, true, true, true, // Row 3
+ false, false, false, false, // Row 4
};
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
Tensor x_tensor =
- makeInputTensor<DataType::U8>({1, 4, 4, 1}, quant_param.first, quant_param.second, x_data);
+ makeInputTensor<DataType::U8>({1, 4, 4, 1}, quant_param.first, quant_param.second, x_data);
Tensor y_tensor =
- makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
+ makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pack.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Pack::Pack(std::vector<const Tensor *> inputs, Tensor *output, const PackParams ¶ms)
+ : KernelWithParams<PackParams>(std::move(inputs), {output}, params)
+{
+}
+
+void Pack::configure()
+{
+ LUCI_INTERPRETER_CHECK(_inputs.size() == static_cast<uint32_t>(params().values_count));
+ const Tensor *t0 = _inputs[0];
+ const int dimension_size = t0->shape().num_dims() + 1;
+ int axis = params().axis;
+ if (axis < 0)
+ {
+ axis += dimension_size;
+ }
+ LUCI_INTERPRETER_CHECK(axis >= 0 && axis <= t0->shape().num_dims());
+
+ if (t0->element_type() != DataType::S32 && t0->element_type() != DataType::FLOAT32 &&
+ t0->element_type() != DataType::U8 && t0->element_type() != DataType::S8 &&
+ t0->element_type() != DataType::S16 && t0->element_type() != DataType::S64)
+ {
+ throw std::runtime_error("Unsupported type.");
+ }
+
+ for (uint32_t i = 1; i < _inputs.size(); ++i)
+ {
+ const Tensor *tensor = _inputs[i];
+ LUCI_INTERPRETER_CHECK(tensor->element_type() == t0->element_type());
+ LUCI_INTERPRETER_CHECK(tensor->shape().num_dims() == t0->shape().num_dims());
+ for (int d = 0; d < t0->shape().num_dims(); ++d)
+ {
+ LUCI_INTERPRETER_CHECK(tensor->shape().dim(d) == t0->shape().dim(d));
+ }
+ }
+
+ Shape output_shape(dimension_size);
+ int i = 0;
+ for (int index = 0; index < dimension_size; ++index)
+ {
+ if (index == axis)
+ {
+ output_shape.dim(index) = params().values_count;
+ }
+ else
+ {
+ output_shape.dim(index) = t0->shape().dim(i++);
+ }
+ }
+
+ if (t0->element_type() == DataType::S32 || t0->element_type() == DataType::U8 ||
+ t0->element_type() == DataType::S8 || t0->element_type() == DataType::S16 ||
+ t0->element_type() == DataType::S64)
+ {
+ LUCI_INTERPRETER_CHECK(output()->zero_point() == t0->zero_point());
+ LUCI_INTERPRETER_CHECK(output()->scale() == t0->scale());
+ // Guarantee input/output quantization params match as we do not support
+ // packing quantized tensors.
+ for (int i = 0; i < params().values_count; i++)
+ {
+ LUCI_INTERPRETER_CHECK(_inputs[i]->zero_point() == t0->zero_point());
+ LUCI_INTERPRETER_CHECK(_inputs[i]->scale() == t0->scale());
+ }
+ }
+
+ output()->resize(output_shape);
+}
+
+void Pack::execute() const
+{
+ switch (_inputs[0]->element_type())
+ {
+ case DataType::FLOAT32:
+ evalGeneric<float>();
+ break;
+ case DataType::U8:
+ evalGeneric<uint8_t>();
+ break;
+ case DataType::S8:
+ evalGeneric<int8_t>();
+ break;
+ case DataType::S16:
+ evalGeneric<int16_t>();
+ break;
+ case DataType::S32:
+ evalGeneric<int32_t>();
+ break;
+ case DataType::S64:
+ evalGeneric<int64_t>();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+template <typename T> void Pack::evalGeneric() const
+{
+ const Tensor *t0 = _inputs[0];
+ const int dimension_size = t0->shape().num_dims() + 1;
+ int axis = params().axis;
+ if (axis < 0)
+ {
+ axis += dimension_size;
+ }
+
+ VectorOfTensors<T, true> inputs(_inputs);
+ tflite::PackParams params{};
+ params.axis = axis;
+ params.inputs_count = _inputs.size();
+ tflite::reference_ops::Pack<T>(params, inputs.shapes(), inputs.data(), getTensorShape(output()),
+ getTensorData<T>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_PACK_H
+#define LUCI_INTERPRETER_KERNELS_PACK_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Pack : public KernelWithParams<PackParams>
+{
+public:
+ Pack(std::vector<const Tensor *> inputs, Tensor *output, const PackParams ¶ms);
+
+ const Tensor *input(int index) const { return _inputs[index]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ template <typename T> void evalGeneric() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_PACK_H
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pack.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::vector<std::initializer_list<int32_t>> input_shapes,
+ std::initializer_list<int32_t> output_shape, std::vector<std::vector<T>> input_datas,
+ std::initializer_list<T> output_data, int32_t axis)
+{
+ constexpr DataType element_type = getElementType<T>();
+ std::vector<const Tensor *> inputs(input_datas.size());
+ std::vector<Tensor> tmp_inputs;
+ for (int i = 0; i < input_datas.size(); i++)
+ {
+ if (std::is_same<T, float>::value)
+ {
+ tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {}, ""));
+ tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T));
+ }
+ else
+ {
+ tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {{1.0f / 255}, {128}}, ""));
+ tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T));
+ }
+ }
+ for (int i = 0; i < input_datas.size(); i++)
+ {
+ inputs[i] = &tmp_inputs[i];
+ }
+
+ Tensor output_tensor = makeOutputTensor(element_type);
+ if (!std::is_same<T, float>::value)
+ {
+ output_tensor = makeOutputTensor(element_type, 1.0f / 255, 128);
+ }
+
+ PackParams params{};
+ params.axis = axis;
+ params.values_count = input_datas.size();
+ Pack kernel(inputs, &output_tensor, params);
+
+ kernel.configure();
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <typename T> class PackTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<uint8_t, float>;
+TYPED_TEST_CASE(PackTest, DataTypes);
+
+TYPED_TEST(PackTest, ThreeInputs)
+{
+ Check<TypeParam>(/*input_shapes=*/{{2}, {2}, {2}},
+ /*output_shape=*/{3, 2},
+ /*input_datas=*/
+ {{1, 4}, {2, 5}, {3, 6}},
+ /*output_data=*/
+ {1, 4, 2, 5, 3, 6}, /*axis=*/0);
+
+ SUCCEED();
+}
+
+TYPED_TEST(PackTest, NegAxis)
+{
+ Check<TypeParam>(/*input_shapes=*/{{2}, {2}, {2}},
+ /*output_shape=*/{2, 3},
+ /*input_datas=*/
+ {{1, 4}, {2, 5}, {3, 6}},
+ /*output_data=*/
+ {1, 2, 3, 4, 5, 6}, /*axis=*/-1);
+
+ SUCCEED();
+}
+
+TEST(Pack, MismatchingInputValuesCount_NEG)
+{
+ std::vector<float> input1_data{1, 4};
+ std::vector<float> input2_data{2, 5};
+ std::vector<float> input3_data{3, 6};
+ Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2}, input1_data);
+ Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2}, input2_data);
+ Tensor input3_tensor = makeInputTensor<DataType::FLOAT32>({2}, input3_data);
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ PackParams params{};
+ {
+ params.axis = 0;
+ params.values_count = 2;
+
+ Pack kernel({&input1_tensor, &input2_tensor, &input3_tensor}, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+ }
+}
+
+TEST(Pack, InvalidInputAxis_NEG)
+{
+ std::vector<float> input1_data{1, 4};
+ std::vector<float> input2_data{2, 5};
+ std::vector<float> input3_data{3, 6};
+ Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2}, input1_data);
+ Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2}, input2_data);
+ Tensor input3_tensor = makeInputTensor<DataType::FLOAT32>({2}, input3_data);
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ PackParams params{};
+ {
+ params.axis = 2;
+ params.values_count = 3;
+
+ Pack kernel({&input1_tensor, &input2_tensor, &input3_tensor}, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+ }
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
{
Pad::Pad(const Tensor *input, const Tensor *paddings, Tensor *output)
- : Kernel({input, paddings}, {output})
+ : Kernel({input, paddings}, {output})
{
}
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
std::vector<float> input_data{-0.8, 0.2, 0.9, 0.7, 0.1, -0.3};
std::vector<int32_t> paddings_data{0, 0, 0, 2, 1, 3, 0, 0};
- Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 3, 1}, quant_param.first,
- quant_param.second, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>({1, 2, 3, 1}, quant_param.first, quant_param.second, input_data);
Tensor paddings_tensor = makeInputTensor<DataType::S32>({4, 2}, paddings_data);
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
{
Pow::Pow(const Tensor *input1, const Tensor *input2, Tensor *output)
- : Kernel({input1, input2}, {output})
+ : Kernel({input1, input2}, {output})
{
}
tflite::ArithmeticParams params{};
const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
- getTensorShape(input1()), getTensorShape(input2()), ¶ms);
+ getTensorShape(input1()), getTensorShape(input2()), ¶ms);
if (need_broadcast)
{
{
Prelu::Prelu(const Tensor *input, const Tensor *alpha, Tensor *output)
- : Kernel({input, alpha}, {output})
+ : Kernel({input, alpha}, {output})
{
}
+Prelu::~Prelu()
+{
+ // Destructor declared to delete vector of alpha quantized data properly
+}
+
void Prelu::configure()
{
LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
LUCI_INTERPRETER_CHECK(alpha()->element_type() == output()->element_type());
+ LUCI_INTERPRETER_CHECK(input()->scales().size() <= 1);
+ LUCI_INTERPRETER_CHECK(output()->scales().size() <= 1);
- if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16)
+ if (input()->element_type() == DataType::U8)
{
- if (input()->element_type() == DataType::S16)
+ LUCI_INTERPRETER_CHECK(alpha()->scales().size() <= 1); // remove when CWQ kernel arrives
+ _alpha_multipliers.resize(1);
+ double alpha_multiplier = input()->scale() * alpha()->scale() / output()->scale();
+ quantizeMultiplier(alpha_multiplier, &_alpha_multipliers[0].multiplier,
+ &_alpha_multipliers[0].shift);
+ double identity_multiplier = input()->scale() / output()->scale();
+ quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
+ }
+ else if (input()->element_type() == DataType::S16)
+ {
+ // Common check for correctness of quant params
+ LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+ for (size_t channel = 0; channel < alpha()->zero_points().size(); ++channel)
{
- LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && alpha()->zero_point() == 0 &&
- output()->zero_point() == 0);
+ LUCI_INTERPRETER_CHECK(alpha()->zero_points()[channel] == 0);
}
- double alpha_multiplier = input()->scale() * alpha()->scale() / output()->scale();
- quantizeMultiplier(alpha_multiplier, &_output_multiplier_alpha, &_output_shift_alpha);
+ // Prelu specific checks for CWQ
+ LUCI_INTERPRETER_CHECK(alpha()->quantized_dimension() == alpha()->shape().num_dims() - 1);
+ LUCI_INTERPRETER_CHECK(static_cast<int32_t>(alpha()->scales().size()) ==
+ alpha()->shape().dim(alpha()->quantized_dimension()));
+ LUCI_INTERPRETER_CHECK(alpha()->shape().num_elements() ==
+ input()->shape().dim(input()->shape().num_dims() - 1));
+
+ // all dimension of alpha except last one should be size 1
+ for (int dim = 0; dim < alpha()->shape().num_dims() - 1; ++dim)
+ {
+ LUCI_INTERPRETER_CHECK(alpha()->shape().dim(dim) == 1);
+ }
+
+ std::vector<double> real_multipliers =
+ getQuantizedConvolutionMultiplers(input()->scale(), alpha()->scales(), output()->scale());
+
+ _alpha_multipliers = quantizeMultipliers(real_multipliers);
+
double identity_multiplier = input()->scale() / output()->scale();
quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
}
if (input()->shape() != alpha()->shape())
{
tflite::reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>(
- getTensorShape(input()), getTensorData<float>(input()), getTensorShape(alpha()),
- getTensorData<float>(alpha()), getTensorShape(output()), getTensorData<float>(output()),
- PreluFunc);
+ getTensorShape(input()), getTensorData<float>(input()), getTensorShape(alpha()),
+ getTensorData<float>(alpha()), getTensorShape(output()), getTensorData<float>(output()),
+ PreluFunc);
}
else
{
op_params.output_offset = output()->zero_point();
op_params.output_shift_1 = _output_shift_identity;
op_params.output_multiplier_1 = _output_multiplier_identity;
- op_params.output_shift_2 = _output_shift_alpha;
- op_params.output_multiplier_2 = _output_multiplier_alpha;
+ op_params.output_shift_2 = _alpha_multipliers[0].shift;
+ op_params.output_multiplier_2 = _alpha_multipliers[0].multiplier;
if (input()->shape() != alpha()->shape())
{
tflite::reference_ops::BroadcastPrelu4DSlow(
- op_params, getTensorShape(input()), getTensorData<uint8_t>(input()),
- getTensorShape(alpha()), getTensorData<uint8_t>(alpha()), getTensorShape(output()),
- getTensorData<uint8_t>(output()));
+ op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(alpha()),
+ getTensorData<uint8_t>(alpha()), getTensorShape(output()), getTensorData<uint8_t>(output()));
}
else
{
- tflite::reference_ops::Prelu<uint8_t>(op_params, getTensorShape(input()),
- getTensorData<uint8_t>(input()), getTensorShape(alpha()),
- getTensorData<uint8_t>(alpha()), getTensorShape(output()),
- getTensorData<uint8_t>(output()));
+ tflite::reference_ops::Prelu<uint8_t>(
+ op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(alpha()),
+ getTensorData<uint8_t>(alpha()), getTensorShape(output()), getTensorData<uint8_t>(output()));
}
}
-void Prelu::evalQuantizedS16() const
+static inline int16_t evalElemS16Prelu(int16_t input_val, int16_t alpha_val,
+ const ChannelQuantMultipliers &identity_mult,
+ const ChannelQuantMultipliers &alpha_mult)
{
constexpr int32_t quantized_min = std::numeric_limits<int16_t>::min();
constexpr int32_t quantized_max = std::numeric_limits<int16_t>::max();
- auto fn = [this, quantized_min, quantized_max](int16_t input_val, int16_t alpha_val) {
- const int32_t output_val =
- input_val >= 0
- ? tflite::MultiplyByQuantizedMultiplier(input_val, _output_multiplier_identity,
- _output_shift_identity)
- : tflite::MultiplyByQuantizedMultiplier(input_val * alpha_val, _output_multiplier_alpha,
- _output_shift_alpha);
- const int32_t clamped_output = std::min(quantized_max, std::max(quantized_min, output_val));
- return static_cast<int16_t>(clamped_output);
- };
-
- BinaryOpBroadcastSlow(getTensorShape(input()), getTensorData<int16_t>(input()),
- getTensorShape(alpha()), getTensorData<int16_t>(alpha()),
- getTensorShape(output()), getTensorData<int16_t>(output()), fn);
+ const int32_t output_val =
+ input_val >= 0 ? tflite::MultiplyByQuantizedMultiplier(input_val, identity_mult.multiplier,
+ identity_mult.shift)
+ : tflite::MultiplyByQuantizedMultiplier(input_val * alpha_val,
+ alpha_mult.multiplier, alpha_mult.shift);
+ const int32_t clamped_output = std::min(quantized_max, std::max(quantized_min, output_val));
+ return clamped_output;
+}
+
+void Prelu::evalQuantizedS16() const
+{
+ // Note that this kernel assumes alpha is CWQ
+ tflite::RuntimeShape input_shape = getTensorShape(input());
+ const int16_t *input_data = input()->data<int16_t>();
+ const int16_t *alpha_data = alpha()->data<int16_t>();
+ int16_t *output_data = output()->data<int16_t>();
+
+ const ChannelQuantMultipliers pos_mult{_output_shift_identity, _output_multiplier_identity};
+
+ const int last_dim = input()->shape().num_dims() - 1;
+
+ int32_t outer_dims_size = 1;
+ for (int i = 0; i < last_dim; ++i)
+ outer_dims_size *= input_shape.Dims(i);
+ int32_t quant_dim_size = input_shape.Dims(last_dim);
+
+ for (int32_t outer_dims = 0; outer_dims < outer_dims_size; ++outer_dims)
+ for (int32_t quant_channel = 0; quant_channel < quant_dim_size; ++quant_channel)
+ {
+ const ChannelQuantMultipliers &neg_mult = _alpha_multipliers[quant_channel];
+ size_t offset = static_cast<size_t>(outer_dims) * static_cast<size_t>(quant_dim_size);
+ offset += quant_channel;
+
+ output_data[offset] =
+ evalElemS16Prelu(input_data[offset], alpha_data[quant_channel], pos_mult, neg_mult);
+ }
}
} // namespace kernels
#define LUCI_INTERPRETER_KERNELS_PRELU_H
#include "core/Kernel.h"
+#include <vector>
namespace luci_interpreter
{
namespace kernels
{
+class ChannelQuantMultipliers;
+
class Prelu : public Kernel
{
public:
Prelu(const Tensor *input, const Tensor *alpha, Tensor *output);
+ ~Prelu();
+
const Tensor *input() const { return _inputs[0]; }
const Tensor *alpha() const { return _inputs[1]; }
Tensor *output() const { return _outputs[0]; }
void evalQuantizedS16() const;
private:
- int32_t _output_multiplier_alpha = 0;
- int32_t _output_shift_alpha = 0;
+ std::vector<ChannelQuantMultipliers> _alpha_multipliers;
+ // TODO merge this into one ChannelQuantMultiplier object
int32_t _output_multiplier_identity = 0;
int32_t _output_shift_identity = 0;
};
/*output_shape=*/{2, 3},
/*input_data=*/
{
- 0.0f, 1.0f, 3.0f, // Row 1
- 1.0f, -1.0f, -2.0f, // Row 2
+ 0.0f, 1.0f, 3.0f, // Row 1
+ 1.0f, -1.0f, -2.0f, // Row 2
},
/*alpha_data=*/
{
- 0.0f, 0.5f, 0.1f, // Row 1
- 0.0f, 0.5f, 0.1f, // Row 2
+ 0.0f, 0.5f, 0.1f, // Row 1
+ 0.0f, 0.5f, 0.1f, // Row 2
},
/*output_data=*/
{
- 0.0f, 1.0f, 3.0f, // Row 1
- 1.0f, -0.5f, -0.2f, // Row 2
+ 0.0f, 1.0f, 3.0f, // Row 1
+ 1.0f, -0.5f, -0.2f, // Row 2
});
SUCCEED();
/*output_shape=*/{1, 2, 2, 3},
/*input_data=*/
{
- 0.0f, 0.0f, 0.0f, // Row 1, Column 1
- 1.0f, 1.0f, 1.0f, // Row 1, Column 2
- -1.0f, -1.0f, -1.0f, // Row 2, Column 1
- -2.0f, -2.0f, -2.0f, // Row 2, Column 2
+ 0.0f, 0.0f, 0.0f, // Row 1, Column 1
+ 1.0f, 1.0f, 1.0f, // Row 1, Column 2
+ -1.0f, -1.0f, -1.0f, // Row 2, Column 1
+ -2.0f, -2.0f, -2.0f, // Row 2, Column 2
},
/*alpha_data=*/
{0.0f, 1.0f, 2.0f},
/*output_data=*/
{
- 0.0f, 0.0f, 0.0f, // Row 1, Column 1
- 1.0f, 1.0f, 1.0f, // Row 1, Column 2
- 0.0f, -1.0f, -2.0f, // Row 2, Column 1
- 0.0f, -2.0f, -4.0f, // Row 2, Column 2
+ 0.0f, 0.0f, 0.0f, // Row 1, Column 1
+ 1.0f, 1.0f, 1.0f, // Row 1, Column 2
+ 0.0f, -1.0f, -2.0f, // Row 2, Column 1
+ 0.0f, -2.0f, -4.0f, // Row 2, Column 2
});
SUCCEED();
float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
- Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 3, 1}, quant_param.first,
- quant_param.second, input_data);
- Tensor alpha_tensor = makeInputTensor<DataType::U8>({1, 2, 3, 1}, quant_param.first,
- quant_param.second, alpha_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>({1, 2, 3, 1}, quant_param.first, quant_param.second, input_data);
+ Tensor alpha_tensor =
+ makeInputTensor<DataType::U8>({1, 2, 3, 1}, quant_param.first, quant_param.second, alpha_data);
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
Prelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
TEST(PreluTest, Uint8Broadcast)
{
std::vector<float> input_data{
- 0.0f, 0.0f, 0.0f, // Row 1, Column 1
- 0.5f, 0.5f, 0.5f, // Row 1, Column 2
- -1.0f, -1.0f, -1.0f, // Row 2, Column 1
- -0.25f, -0.25f, -0.25f, // Row 2, Column 2
+ 0.0f, 0.0f, 0.0f, // Row 1, Column 1
+ 0.5f, 0.5f, 0.5f, // Row 1, Column 2
+ -1.0f, -1.0f, -1.0f, // Row 2, Column 1
+ -0.25f, -0.25f, -0.25f, // Row 2, Column 2
};
std::vector<float> alpha_data{0.0f, 0.5f, -0.5f};
std::vector<float> ref_output_data{
- 0.0f, 0.0f, 0.0f, // Row 1, Column 1
- 0.5f, 0.5f, 0.5f, // Row 1, Column 2
- 0.0f, -0.5f, 0.5f, // Row 2, Column 1
- 0.0f, -0.125f, 0.125f // Row 2, Column 2
+ 0.0f, 0.0f, 0.0f, // Row 1, Column 1
+ 0.5f, 0.5f, 0.5f, // Row 1, Column 2
+ 0.0f, -0.5f, 0.5f, // Row 2, Column 1
+ 0.0f, -0.125f, 0.125f // Row 2, Column 2
};
std::vector<float> ref_quant_output_data{
- 128, 128, 128, // Row 1, Column 1
- 192, 192, 192, // Row 1, Column 2
- 128, 64, 192, // Row 2, Column 1
- 128, 112, 144 // Row 2, Column 2
+ 128, 128, 128, // Row 1, Column 1
+ 192, 192, 192, // Row 1, Column 2
+ 128, 64, 192, // Row 2, Column 1
+ 128, 112, 144 // Row 2, Column 2
};
float kQuantizedTolerance = 2 * (1. / 256);
const float kMin = -1;
const float kMax = 127.f / 128.f;
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(kMin, kMax);
- Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 2, 3}, quant_param.first,
- quant_param.second, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>({1, 2, 2, 3}, quant_param.first, quant_param.second, input_data);
Tensor alpha_tensor =
- makeInputTensor<DataType::U8>({1, 1, 3}, quant_param.first, quant_param.second, alpha_data);
+ makeInputTensor<DataType::U8>({1, 1, 3}, quant_param.first, quant_param.second, alpha_data);
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
Prelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
::testing::ElementsAreArray(ref_quant_output_data));
}
-TEST(PreluTest, SInt16Simple)
+TEST(PreluTest, SInt16_LWQ_NEG)
{
- std::vector<float> input_data{-0.8f, 0.2f, 0.9f, 0.7f, 0.1f, -0.4f};
- std::vector<float> alpha_data{0.5f, 0.5f, 0.5f, 0.25f, 1.0f, 0.25f};
- std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, 0.7f, 0.1f, -0.1f};
+ // Rewrite this test in case layer-wise quantization for sint16 is supported
+ std::vector<float> input_data(6); // data is not important
+ std::vector<float> alpha_data(6);
Tensor input_tensor = makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, input_data);
Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, alpha_data);
Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0);
+ Prelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PreluTest, SInt16_CWQ_Simple)
+{
+ std::vector<float> input_data{-0.8f, 0.2f, 0.9f, -0.7f, 0.1f, -0.4f};
+ std::vector<float> alpha_data{0.5f, 0.25f};
+ std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, -0.175f, 0.1f, -0.1f};
+
+ std::vector<float> alpha_scales{0.05f, 0.025f};
+ std::vector<int32_t> zerop{0, 0};
+ Tensor input_tensor = makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data);
+ Tensor alpha_tensor = makeInputTensor<DataType::S16>({2}, alpha_scales, zerop, 0, alpha_data);
+ Tensor output_tensor = makeOutputTensor(DataType::S16, 0.025, 0);
+
Prelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
kernel.configure();
kernel.execute();
- EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 3, 1}));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 3, 2}));
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(PreluTest, SInt16_CWQ_spatial_alpha_NEG)
+{
+ std::vector<float> input_data(6); // data is not important
+ std::vector<float> alpha_data(6);
+
+ std::vector<float> alpha_scales{0.25f, 0.05f};
+ std::vector<int32_t> zerop{0, 0};
+ Tensor input_tensor = makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data);
+ Tensor alpha_tensor =
+ makeInputTensor<DataType::S16>({1, 1, 3, 2}, alpha_scales, zerop, 3, alpha_data);
+ Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0);
+
+ Prelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PreluTest, SInt16_CWQ_wrong_dim_quant_NEG)
+{
+ std::vector<float> input_data(6); // data is not important
+ std::vector<float> alpha_data(6);
+
+ std::vector<float> alpha_scales{0.25f};
+ std::vector<int32_t> zerop{0};
+ Tensor input_tensor = makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data);
+ Tensor alpha_tensor =
+ makeInputTensor<DataType::S16>({1, 1, 1, 2}, alpha_scales, zerop, 1, alpha_data);
+ Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0);
+
+ Prelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PreluTest, SInt16_CWQ_uneven_shape1)
+{
+ std::vector<float> input_data{-0.8f, 0.2f, 0.9f, -0.7f, 0.1f, -0.4f};
+ std::vector<float> alpha_data{0.5f, 0.25f};
+ std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, -0.175f, 0.1f, -0.1f};
+
+ std::vector<float> alpha_scales{0.05f, 0.025f};
+ std::vector<int32_t> zerop{0, 0};
+ Tensor input_tensor = makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data);
+ Tensor alpha_tensor =
+ makeInputTensor<DataType::S16>({1, 1, 2}, alpha_scales, zerop, 2, alpha_data);
+ Tensor output_tensor = makeOutputTensor(DataType::S16, 0.025, 0);
+
+ Prelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+ kernel.configure();
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 3, 2}));
EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
}
-TEST(PreluTest, SInt16Broadcast)
+TEST(PreluTest, SInt16_CWQ_uneven_shape2)
{
std::vector<float> input_data{
- 0.0f, 0.0f, 0.0f, // Row 1, Column 1
- 0.5f, 0.5f, 0.5f, // Row 1, Column 2
- -1.0f, -1.0f, -1.0f, // Row 2, Column 1
- -0.25f, -0.25f, -0.25f, // Row 2, Column 2
+ 0.0f, 0.0f, 0.0f, // Row 1, Column 1
+ 0.5f, 0.5f, 0.5f, // Row 1, Column 2
+ -1.0f, -1.0f, -1.0f, // Row 2, Column 1
+ -0.25f, -0.25f, -0.25f, // Row 2, Column 2
};
std::vector<float> alpha_data{0.0f, 0.5f, -0.5f};
std::vector<float> ref_output_data{
- 0.0f, 0.0f, 0.0f, // Row 1, Column 1
- 0.5f, 0.5f, 0.5f, // Row 1, Column 2
- 0.0f, -0.5f, 0.5f, // Row 2, Column 1
- 0.0f, -0.125f, 0.125f // Row 2, Column 2
+ 0.0f, 0.0f, 0.0f, // Row 1, Column 1
+ 0.5f, 0.5f, 0.5f, // Row 1, Column 2
+ 0.0f, -0.5f, 0.5f, // Row 2, Column 1
+ 0.0f, -0.125f, 0.125f // Row 2, Column 2
};
+ std::vector<float> alpha_scales{1.f, 0.05f, 0.1f};
+ std::vector<int32_t> zerop{0, 0, 0};
Tensor input_tensor = makeInputTensor<DataType::S16>({1, 2, 2, 3}, 0.01, 0, input_data);
- Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 3}, 0.1, 0, alpha_data);
+ Tensor alpha_tensor =
+ makeInputTensor<DataType::S16>({1, 1, 1, 3}, alpha_scales, zerop, 3, alpha_data);
Tensor output_tensor = makeOutputTensor(DataType::S16, 0.001, 0);
Prelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
EXPECT_ANY_THROW(kernel.execute());
}
+TEST(PreluTest, Input_Output_U8_CWQ_NEG)
+{
+ std::vector<float> scales{1.f, 1.f};
+ std::vector<int32_t> zerop{0, 0};
+ std::vector<float> dummy_data(4, 0.f);
+ Tensor input_tensor = makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data);
+ Tensor alpha_tensor = makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data);
+ Tensor output_tensor = makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data);
+
+ Prelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PreluTest, Input_Output_S16_CWQ_NEG)
+{
+ std::vector<float> scales{1.f, 1.f};
+ std::vector<int32_t> zerop{0, 0};
+ std::vector<float> dummy_data(4, 0.f);
+ Tensor input_tensor = makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data);
+ Tensor alpha_tensor = makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data);
+ Tensor output_tensor = makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data);
+
+ Prelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PreluTest, Mixing_U8_S16_NEG)
+{
+ std::vector<float> dummy_data(4, 0.f);
+ Tensor input_tensor = makeInputTensor<DataType::U8>({2, 2}, 1.f, 0, dummy_data);
+ Tensor alpha_tensor = makeInputTensor<DataType::S16>({2, 2}, 1.f, 0, dummy_data);
+ Tensor output_tensor = makeInputTensor<DataType::U8>({2, 2}, 1.f, 0, dummy_data);
+
+ Prelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
} // namespace
} // namespace kernels
} // namespace luci_interpreter
params.output_shift = _output_shift;
params.quantized_activation_min =
- std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset);
+ std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset);
params.quantized_activation_max = static_cast<int32_t>(std::numeric_limits<uint8_t>::max());
tflite::optimized_ops::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
{
const int32_t input_val = input_data[i];
int32_t output_val =
- tflite::MultiplyByQuantizedMultiplier(input_val, _output_multiplier, _output_shift);
+ tflite::MultiplyByQuantizedMultiplier(input_val, _output_multiplier, _output_shift);
output_val = std::max(output_val, output_min);
output_val = std::min(output_val, output_max);
output_data[i] = static_cast<int16_t>(output_val);
TEST(ReluTest, FloatSimple)
{
std::vector<float> input_data{
- 0.0f, 1.0f, 3.0f, // Row 1
- 1.0f, -1.0f, -2.0f, // Row 2
+ 0.0f, 1.0f, 3.0f, // Row 1
+ 1.0f, -1.0f, -2.0f, // Row 2
};
std::vector<float> ref_output_data{
- 0.0f, 1.0f, 3.0f, // Row 1
- 1.0f, 0.0f, 0.0f, // Row 2
+ 0.0f, 1.0f, 3.0f, // Row 1
+ 1.0f, 0.0f, 0.0f, // Row 2
};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input_data);
TEST(ReluTest, Uint8Quantized)
{
std::vector<float> input_data{
- 0, -6, 2, 4, //
- 3, -2, 7, 1, //
+ 0, -6, 2, 4, //
+ 3, -2, 7, 1, //
};
// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
const float f_min = (-128.0 / 128.0) * 8;
const float f_max = (127.0 / 128.0) * 8;
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(f_min, f_max);
- Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first,
- quant_param.second, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, input_data);
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
Relu kernel(&input_tensor, &output_tensor);
TEST(ReluTest, Uint8Requantized)
{
std::vector<float> input_data{
- 0, -6, 2, 4, //
- 3, -2, 7, 1, //
+ 0, -6, 2, 4, //
+ 3, -2, 7, 1, //
};
// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
const float out_max = (255.0 / 256.0) * 8;
std::pair<float, int32_t> quant_input = quantizationParams<uint8_t>(in_min, in_max);
- Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_input.first,
- quant_input.second, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_input.first, quant_input.second, input_data);
std::pair<float, int32_t> quant_output = quantizationParams<uint8_t>(out_min, out_max);
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_output.first, quant_output.second);
TEST(ReluTest, SInt16)
{
std::vector<float> input_data{
- 0, -6, 2, 4, //
- 3, -2, 7, 1, //
+ 0, -6, 2, 4, //
+ 3, -2, 7, 1, //
};
std::vector<float> ref_output_data{
- 0, 0, 2, 4, //
- 3, 0, 7, 1, //
+ 0, 0, 2, 4, //
+ 3, 0, 7, 1, //
};
Tensor input_tensor = makeInputTensor<DataType::S16>({1, 2, 4, 1}, 0.5, 0, input_data);
params.output_shift = _output_shift;
params.quantized_activation_min =
- std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset);
+ std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset);
params.quantized_activation_max =
- std::min(static_cast<int32_t>(std::numeric_limits<uint8_t>::max()),
- params.output_offset + static_cast<int32>(roundf(6.f / output()->scale())));
+ std::min(static_cast<int32_t>(std::numeric_limits<uint8_t>::max()),
+ params.output_offset + static_cast<int32>(roundf(6.f / output()->scale())));
tflite::optimized_ops::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
getTensorShape(output()), getTensorData<uint8_t>(output()));
TEST(Relu6Test, FloatSimple)
{
std::vector<float> input_data{
- 0.0f, 1.0f, 3.0f, // Row 1
- 7.0f, -1.0f, -2.0f, // Row 2
+ 0.0f, 1.0f, 3.0f, // Row 1
+ 7.0f, -1.0f, -2.0f, // Row 2
};
std::vector<float> ref_output_data{
- 0.0f, 1.0f, 3.0f, // Row 1
- 6.0f, 0.0f, 0.0f, // Row 2
+ 0.0f, 1.0f, 3.0f, // Row 1
+ 6.0f, 0.0f, 0.0f, // Row 2
};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input_data);
const float tolerance = (f_max - f_min) / 255.0;
std::vector<float> input_data{
- 0, -6, 2, 8, //
- -2, 3, 7, 1, //
+ 0, -6, 2, 8, //
+ -2, 3, 7, 1, //
};
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(f_min, f_max);
- Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first,
- quant_param.second, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, input_data);
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
Relu6 kernel(&input_tensor, &output_tensor);
const float tolerance = (in_max - in_min) / 255.0;
std::vector<float> input_data{
- 0, -6, 2, 8, //
- -2, 3, 7, 1, //
+ 0, -6, 2, 8, //
+ -2, 3, 7, 1, //
};
std::pair<float, int32_t> quant_input = quantizationParams<uint8_t>(in_min, in_max);
- Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_input.first,
- quant_input.second, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_input.first, quant_input.second, input_data);
std::pair<float, int32_t> quant_output = quantizationParams<uint8_t>(out_min, out_max);
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_output.first, quant_output.second);
}
Reshape::Reshape(const Tensor *input, const Tensor *shape, Tensor *output)
- : Kernel({input, shape}, {output})
+ : Kernel({input, shape}, {output})
{
}
ResizeBilinear::ResizeBilinear(const Tensor *input, const Tensor *size, Tensor *output,
const ResizeBilinearParams ¶ms)
- : KernelWithParams<ResizeBilinearParams>({input, size}, {output}, params)
+ : KernelWithParams<ResizeBilinearParams>({input, size}, {output}, params)
{
}
{
case DataType::FLOAT32:
tflite::optimized_ops::ResizeBilinear(
- op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(size()),
- getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<float>(output()));
+ op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(size()),
+ getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<float>(output()));
break;
case DataType::U8:
tflite::optimized_ops::ResizeBilinear(
- op_params, getTensorShape(input()), getTensorData<uint8_t>(input()),
- getTensorShape(size()), getTensorData<int32_t>(size()), getTensorShape(output()),
- getTensorData<uint8_t>(output()));
+ op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(size()),
+ getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<uint8_t>(output()));
break;
default:
throw std::runtime_error("Unsupported type.");
{
Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
{
- 3, 6, //
- 9, 12, //
- 4, 10, //
- 10, 16 //
+ 3, 6, //
+ 9, 12, //
+ 4, 10, //
+ 10, 16 //
},
{3, 3},
{
- 3, 5, 6, //
- 7, 9, 10, //
- 9, 11, 12, //
- 4, 8, 10, //
- 8, 12, 14, //
- 10, 14, 16, //
+ 3, 5, 6, //
+ 7, 9, 10, //
+ 9, 11, 12, //
+ 4, 8, 10, //
+ 8, 12, 14, //
+ 10, 14, 16, //
},
false, false);
SUCCEED();
{
Check<float>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
{
- 1, 2, //
- 3, 4, //
- 1, 2, //
- 3, 4 //
+ 1, 2, //
+ 3, 4, //
+ 1, 2, //
+ 3, 4 //
},
{3, 3},
{
- 1, 1.5, 2, //
- 2, 2.5, 3, //
- 3, 3.5, 4, //
- 1, 1.5, 2, //
- 2, 2.5, 3, //
- 3, 3.5, 4, //
+ 1, 1.5, 2, //
+ 2, 2.5, 3, //
+ 3, 3.5, 4, //
+ 1, 1.5, 2, //
+ 2, 2.5, 3, //
+ 3, 3.5, 4, //
},
false, true);
SUCCEED();
{
Check<uint8_t>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
{
- 3, 6, //
- 9, 12, //
- 4, 10, //
- 12, 16 //
+ 3, 6, //
+ 9, 12, //
+ 4, 10, //
+ 12, 16 //
},
{3, 3},
{
- 2, 4, 6, //
- 6, 7, 9, //
- 9, 10, 12, //
- 4, 7, 10, //
- 8, 10, 13, //
- 12, 14, 16, //
+ 2, 4, 6, //
+ 6, 7, 9, //
+ 9, 10, 12, //
+ 4, 7, 10, //
+ 8, 10, 13, //
+ 12, 14, 16, //
},
false, true);
SUCCEED();
TEST(ResizeBilinearTest, InputShapeInvalid_NEG)
{
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2}, {
- 3, 6, //
- 9, 12, //
- 4, 10, //
- 10, 16 //
+ 3, 6, //
+ 9, 12, //
+ 4, 10, //
+ 10, 16 //
});
Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3});
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
TEST(ResizeBilinearTest, SizeShapeInvalid_NEG)
{
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, {
- 3, 6, //
- 9, 12, //
- 4, 10, //
- 10, 16 //
+ 3, 6, //
+ 9, 12, //
+ 4, 10, //
+ 10, 16 //
});
Tensor size_tensor = makeInputTensor<DataType::S32>({2, 1}, {3, 3});
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
TEST(ResizeBilinearTest, SizeDimInvalid_NEG)
{
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, {
- 3, 6, //
- 9, 12, //
- 4, 10, //
- 10, 16 //
+ 3, 6, //
+ 9, 12, //
+ 4, 10, //
+ 10, 16 //
});
Tensor size_tensor = makeInputTensor<DataType::S32>({3}, {3, 3, 1});
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
TEST(ResizeBilinearTest, InvalidParams_NEG)
{
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, {
- 3, 6, //
- 9, 12, //
- 4, 10, //
- 10, 16 //
+ 3, 6, //
+ 9, 12, //
+ 4, 10, //
+ 10, 16 //
});
Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3});
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
ResizeNearestNeighbor::ResizeNearestNeighbor(const Tensor *input, const Tensor *size,
Tensor *output,
const ResizeNearestNeighborParams ¶ms)
- : KernelWithParams<ResizeNearestNeighborParams>({input, size}, {output}, params)
+ : KernelWithParams<ResizeNearestNeighborParams>({input, size}, {output}, params)
{
}
{
case DataType::FLOAT32:
tflite::reference_ops::ResizeNearestNeighbor(
- op_params, getTensorShape(input()), getTensorData<int32_t>(input()),
- getTensorShape(size()), getTensorData<int32_t>(size()), getTensorShape(output()),
- getTensorData<int32_t>(output()));
+ op_params, getTensorShape(input()), getTensorData<int32_t>(input()), getTensorShape(size()),
+ getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<int32_t>(output()));
break;
case DataType::U8:
tflite::optimized_ops::ResizeNearestNeighbor(
- op_params, getTensorShape(input()), getTensorData<uint8_t>(input()),
- getTensorShape(size()), getTensorData<int32_t>(size()), getTensorShape(output()),
- getTensorData<uint8_t>(output()));
+ op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(size()),
+ getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<uint8_t>(output()));
break;
default:
throw std::runtime_error("Unsupported type.");
bool half_pixel_centers)
{
std::pair<float, int32_t> quant_param =
- quantizationParams<uint8_t>(std::min(input_data) < 0 ? std::min(input_data) : 0.f,
- std::max(input_data) > 0 ? std::max(input_data) : 0.f);
+ quantizationParams<uint8_t>(std::min(input_data) < 0 ? std::min(input_data) : 0.f,
+ std::max(input_data) > 0 ? std::max(input_data) : 0.f);
Tensor input_tensor =
- makeInputTensor<DataType::U8>(input_shape, quant_param.first, quant_param.second, input_data);
+ makeInputTensor<DataType::U8>(input_shape, quant_param.first, quant_param.second, input_data);
Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data);
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.first);
{
Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
{
- 3, 6, //
- 9, 12, //
- 4, 10, //
- 10, 16 //
+ 3, 6, //
+ 9, 12, //
+ 4, 10, //
+ 10, 16 //
},
{3, 3},
{
- 3, 3, 6, //
- 3, 3, 6, //
- 9, 9, 12, //
- 4, 4, 10, //
- 4, 4, 10, //
- 10, 10, 16, //
+ 3, 3, 6, //
+ 3, 3, 6, //
+ 9, 9, 12, //
+ 4, 4, 10, //
+ 4, 4, 10, //
+ 10, 10, 16, //
},
false, false);
}
{
Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
{
- 3, 6, //
- 9, 12, //
- 4, 10, //
- 10, 16 //
+ 3, 6, //
+ 9, 12, //
+ 4, 10, //
+ 10, 16 //
},
{3, 3},
{
- 3, 6, 6, //
- 9, 12, 12, //
- 9, 12, 12, //
- 4, 10, 10, //
- 10, 16, 16, //
- 10, 16, 16, //
+ 3, 6, 6, //
+ 9, 12, 12, //
+ 9, 12, 12, //
+ 4, 10, 10, //
+ 10, 16, 16, //
+ 10, 16, 16, //
},
true, false);
}
{
Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
{
- 3, 6, //
- 9, 12, //
- 4, 10, //
- 10, 16 //
+ 3, 6, //
+ 9, 12, //
+ 4, 10, //
+ 10, 16 //
},
{3, 3},
{
- 3, 6, 6, //
- 9, 12, 12, //
- 9, 12, 12, //
- 4, 10, 10, //
- 10, 16, 16, //
- 10, 16, 16, //
+ 3, 6, 6, //
+ 9, 12, 12, //
+ 9, 12, 12, //
+ 4, 10, 10, //
+ 10, 16, 16, //
+ 10, 16, 16, //
},
false, true);
}
TEST(ResizeNearestNeighborTest, InputShapeInvalid_NEG)
{
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2}, {
- 3, 6, //
- 9, 12, //
- 4, 10, //
- 10, 16 //
+ 3, 6, //
+ 9, 12, //
+ 4, 10, //
+ 10, 16 //
});
Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3});
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
TEST(ResizeNearestNeighborTest, SizeShapeInvalid_NEG)
{
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, {
- 3, 6, //
- 9, 12, //
- 4, 10, //
- 10, 16 //
+ 3, 6, //
+ 9, 12, //
+ 4, 10, //
+ 10, 16 //
});
Tensor size_tensor = makeInputTensor<DataType::S32>({2, 1}, {3, 3});
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
TEST(ResizeNearestNeighborTest, SizeDimInvalid_NEG)
{
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, {
- 3, 6, //
- 9, 12, //
- 4, 10, //
- 10, 16 //
+ 3, 6, //
+ 9, 12, //
+ 4, 10, //
+ 10, 16 //
});
Tensor size_tensor = makeInputTensor<DataType::S32>({3}, {3, 3, 1});
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
{
Reverse::Reverse(const Tensor *input, const Tensor *axes, Tensor *output)
- : Kernel({input, axes}, {output})
+ : Kernel({input, axes}, {output})
{
}
break;
case DataType::U8:
tflite::reference_ops::Reverse<uint8_t>(
- axis_value, getTensorShape(input()), getTensorData<uint8_t>(input()),
- getTensorShape(output()), getTensorData<uint8_t>(output()));
+ axis_value, getTensorShape(input()), getTensorData<uint8_t>(input()),
+ getTensorShape(output()), getTensorData<uint8_t>(output()));
break;
default:
throw std::runtime_error("Unsupported output type");
TEST(RsqrtTest, SimpleRsqrt)
{
Check(
- /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
- /*input_data=*/
- {
- 5, 4, 8, 2, //
- 6, 7.5, 9, 0.3, //
- },
- /*output_data=*/
- {
- 0.44721360, 0.5, 0.35355339, 0.70710678, //
- 0.40824829, 0.36514837, 0.33333333, 1.8257419, //
- });
+ /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
+ /*input_data=*/
+ {
+ 5, 4, 8, 2, //
+ 6, 7.5, 9, 0.3, //
+ },
+ /*output_data=*/
+ {
+ 0.44721360, 0.5, 0.35355339, 0.70710678, //
+ 0.40824829, 0.36514837, 0.33333333, 1.8257419, //
+ });
}
TEST(RsqrtTest, Input_Output_Type_NEG)
const int max_dim = 4;
Slice::Slice(const Tensor *input, const Tensor *begin, const Tensor *size, Tensor *output)
- : Kernel({input, begin, size}, {output})
+ : Kernel({input, begin, size}, {output})
{
}
{
Softmax::Softmax(const Tensor *input, Tensor *output, const SoftmaxParams ¶ms)
- : KernelWithParams<SoftmaxParams>({input}, {output}, params)
+ : KernelWithParams<SoftmaxParams>({input}, {output}, params)
{
}
std::initializer_list<float> output_data)
{
std::pair<float, int32_t> input_quant_param =
- quantizationParams<uint8_t>(std::min<float>(std::min<float>(input_data), 0.f),
- std::max<float>(std::max<float>(input_data), 0.f));
+ quantizationParams<uint8_t>(std::min<float>(std::min<float>(input_data), 0.f),
+ std::max<float>(std::max<float>(input_data), 0.f));
std::pair<float, int32_t> output_quant_param =
- quantizationParams<uint8_t>(std::min<float>(std::min<float>(output_data), 0.f),
- std::max<float>(std::max<float>(output_data), 0.f));
+ quantizationParams<uint8_t>(std::min<float>(std::min<float>(output_data), 0.f),
+ std::max<float>(std::max<float>(output_data), 0.f));
Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first,
input_quant_param.second, input_data);
Tensor output_tensor =
- makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+ makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
SoftmaxParams params{};
params.beta = 0.1;
{
Check<TypeParam>({2, 1, 2, 3}, {2, 1, 2, 3},
{
- 5, -9, 8, //
- -7, 2, -4, //
- 1, -2, 9, //
- 3, -6, -1, //
+ 5, -9, 8, //
+ -7, 2, -4, //
+ 1, -2, 9, //
+ 3, -6, -1, //
},
{
- 0.38514, 0.09497, 0.51989, //
- 0.20792, 0.51141, 0.28067, //
- 0.25212, 0.18678, 0.56110, //
- 0.48149, 0.19576, 0.32275, //
+ 0.38514, 0.09497, 0.51989, //
+ 0.20792, 0.51141, 0.28067, //
+ 0.25212, 0.18678, 0.56110, //
+ 0.48149, 0.19576, 0.32275, //
});
}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SpaceToBatchND.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+const int kInputMinDimensionNum = 3;
+const int kInputMaxDimensionNum = 4;
+
+} // namespace
+
+SpaceToBatchND::SpaceToBatchND(const Tensor *input, const Tensor *block_shape,
+ const Tensor *paddings, Tensor *output)
+ : Kernel({input, block_shape, paddings}, {output})
+{
+}
+
+void SpaceToBatchND::configure()
+{
+ const auto *block_shape_data = block_shape()->data<int32_t>();
+ const auto *paddings_data = paddings()->data<int32_t>();
+ LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= kInputMinDimensionNum);
+ LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= kInputMaxDimensionNum);
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+ int spatial_dims_num = input()->shape().num_dims() - 2;
+
+ LUCI_INTERPRETER_CHECK(block_shape()->shape().num_dims() == 1);
+ LUCI_INTERPRETER_CHECK(block_shape()->shape().dim(0) == spatial_dims_num);
+
+ LUCI_INTERPRETER_CHECK(paddings()->shape().num_dims() == 2);
+ LUCI_INTERPRETER_CHECK(paddings()->shape().dim(0) == spatial_dims_num);
+ LUCI_INTERPRETER_CHECK(paddings()->shape().dim(1) == 2);
+
+ Shape output_shape = Shape(input()->shape().num_dims());
+ int output_batch_size = input()->shape().dim(0);
+ for (int i = 0; i < spatial_dims_num; ++i)
+ {
+ int final_dim_size =
+ (input()->shape().dim(i + 1) + paddings_data[i * 2] + paddings_data[i * 2 + 1]);
+ LUCI_INTERPRETER_CHECK(final_dim_size % block_shape_data[i] == 0);
+ output_shape.dim(i + 1) = final_dim_size / block_shape_data[i];
+ output_batch_size = output_batch_size * block_shape_data[i];
+ }
+ output_shape.dim(0) = output_batch_size;
+ output_shape.dim(input()->shape().num_dims() - 1) =
+ input()->shape().dim(input()->shape().num_dims() - 1);
+ output()->resize(output_shape);
+}
+
+void SpaceToBatchND::execute() const
+{
+ switch (input()->element_type())
+ {
+ tflite::SpaceToBatchParams op_params;
+ case DataType::FLOAT32:
+ op_params.output_offset = 0;
+ tflite::optimized_ops::SpaceToBatchND(
+ op_params, getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()),
+ getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ case DataType::U8:
+ op_params.output_offset = output()->zero_point();
+ tflite::optimized_ops::SpaceToBatchND(
+ op_params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+ getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()),
+ getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SPACETOBATCHND_H
+#define LUCI_INTERPRETER_KERNELS_SPACETOBATCHND_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SpaceToBatchND : public Kernel
+{
+public:
+ SpaceToBatchND(const Tensor *input, const Tensor *block_shape, const Tensor *paddings,
+ Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *block_shape() const { return _inputs[1]; }
+ const Tensor *paddings() const { return _inputs[2]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SPACETOBATCHND_H
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SpaceToBatchND.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape,
+ std::initializer_list<int32_t> block_shape_shape,
+ std::initializer_list<int32_t> paddings_shape,
+ std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data,
+ std::initializer_list<int32_t> block_shape_data,
+ std::initializer_list<int32_t> paddings_data, std::initializer_list<float> output_data)
+{
+ constexpr DataType element_type = getElementType<T>();
+ Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
+ Tensor block_shape_tensor = makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data);
+ Tensor paddings_tensor = makeInputTensor<DataType::S32>(paddings_shape, paddings_data);
+ Tensor output_tensor = makeOutputTensor(element_type);
+
+ SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor);
+ kernel.configure();
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <>
+void Check<uint8_t>(
+ std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> block_shape_shape,
+ std::initializer_list<int32_t> paddings_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data, std::initializer_list<int32_t> block_shape_data,
+ std::initializer_list<int32_t> paddings_data, std::initializer_list<float> output_data)
+{
+ std::pair<float, int32_t> input_quant_param =
+ quantizationParams<uint8_t>(std::min(input_data), std::max(input_data));
+ Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first,
+ input_quant_param.second, input_data);
+ Tensor block_shape_tensor = makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data);
+ Tensor paddings_tensor = makeInputTensor<DataType::S32>(paddings_shape, paddings_data);
+ Tensor output_tensor =
+ makeOutputTensor(DataType::U8, input_quant_param.first, input_quant_param.second);
+
+ SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor);
+ kernel.configure();
+ kernel.execute();
+
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(output_data, output_tensor.scale()));
+ EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <typename T> class SpaceToBatchNDTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_CASE(SpaceToBatchNDTest, DataTypes);
+
+TYPED_TEST(SpaceToBatchNDTest, Simple)
+{
+ Check<TypeParam>(/*input_shape=*/{1, 5, 2, 1}, /*block_shape_shape=*/{2},
+ /*paddings_shape=*/{2, 2},
+ /*output_shape=*/{6, 2, 2, 1},
+ /*input_data=*/{-1.0, 0.2, -0.3, 0.4, -0.5, 0.6, -0.7, 0.8, -0.9, 1.0},
+ /*block_shape_data=*/{3, 2}, /*paddings_data=*/{1, 0, 2, 0},
+ /*output_data=*/{0, 0, 0, -0.5, 0, 0, 0, 0.6, 0, -1.0, 0, -0.7,
+ 0, 0.2, 0, 0.8, 0, -0.3, 0, -0.9, 0, 0.4, 0, 1.0});
+}
+
+TEST(SpaceToBatchNDTest, Invalid_Shape_NEG)
+{
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
+ Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2});
+ Tensor paddings_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, 0, 0});
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
{
SpaceToDepth::SpaceToDepth(const Tensor *input, Tensor *output, const SpaceToDepthParams ¶ms)
- : KernelWithParams<SpaceToDepthParams>({input}, {output}, params)
+ : KernelWithParams<SpaceToDepthParams>({input}, {output}, params)
{
}
{
Split::Split(const Tensor *axis, const Tensor *input, std::vector<Tensor *> outputs)
- : Kernel({axis, input}, std::move(outputs))
+ : Kernel({axis, input}, std::move(outputs))
{
}
Check<TypeParam>(/*axis=*/0, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
{
- {1, 2, 3, 4, 5, 6, 7, 8}, //
- {9, 10, 11, 12, 13, 14, 15, 16}, //
+ {1, 2, 3, 4, 5, 6, 7, 8}, //
+ {9, 10, 11, 12, 13, 14, 15, 16}, //
});
Check<TypeParam>(
- /*axis=*/1, /*num_splits=*/2, {2, 2, 2, 2}, {2, 1, 2, 2},
- {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, {
- {1, 2, 3, 4, 9, 10, 11, 12}, //
- {5, 6, 7, 8, 13, 14, 15, 16}, //
- });
+ /*axis=*/1, /*num_splits=*/2, {2, 2, 2, 2}, {2, 1, 2, 2},
+ {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+ {
+ {1, 2, 3, 4, 9, 10, 11, 12}, //
+ {5, 6, 7, 8, 13, 14, 15, 16}, //
+ });
Check<TypeParam>(
- /*axis=*/2, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 1, 2},
- {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, {
- {1, 2, 5, 6, 9, 10, 13, 14}, //
- {3, 4, 7, 8, 11, 12, 15, 16}, //
- });
+ /*axis=*/2, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 1, 2},
+ {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+ {
+ {1, 2, 5, 6, 9, 10, 13, 14}, //
+ {3, 4, 7, 8, 11, 12, 15, 16}, //
+ });
Check<TypeParam>(
- /*axis=*/3, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 2, 1},
- {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, {
- {1, 3, 5, 7, 9, 11, 13, 15}, //
- {2, 4, 6, 8, 10, 12, 14, 16}, //
- });
+ /*axis=*/3, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 2, 1},
+ {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+ {
+ {1, 3, 5, 7, 9, 11, 13, 15}, //
+ {2, 4, 6, 8, 10, 12, 14, 16}, //
+ });
}
TYPED_TEST(SplitTest, OneDimensional)
{
Check<TypeParam>(
- /*axis=*/0, /*num_splits=*/8, {8}, {1}, {1, 2, 3, 4, 5, 6, 7, 8},
- {{1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}});
+ /*axis=*/0, /*num_splits=*/8, {8}, {1}, {1, 2, 3, 4, 5, 6, 7, 8},
+ {{1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}});
}
TYPED_TEST(SplitTest, NegativeAxis)
{
Check<TypeParam>(
- /*axis=*/-4, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2},
- {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, {
- {1, 2, 3, 4, 5, 6, 7, 8}, //
- {9, 10, 11, 12, 13, 14, 15, 16},
- });
+ /*axis=*/-4, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2},
+ {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+ {
+ {1, 2, 3, 4, 5, 6, 7, 8}, //
+ {9, 10, 11, 12, 13, 14, 15, 16},
+ });
}
} // namespace
TEST(SqrtTest, SimpleSqrt)
{
Check(
- /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
- /*input_data=*/
- {
- 0, 8, 2, 4, //
- 3, 7, 10, 0.3, //
- },
- /*output_data=*/
- {
- 0.0, 2.8284271, 1.4142136, 2, //
- 1.7320508, 2.6457513, 3.1622777, 0.54772256, //
- });
+ /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
+ /*input_data=*/
+ {
+ 0, 8, 2, 4, //
+ 3, 7, 10, 0.3, //
+ },
+ /*output_data=*/
+ {
+ 0.0, 2.8284271, 1.4142136, 2, //
+ 1.7320508, 2.6457513, 3.1622777, 0.54772256, //
+ });
}
TEST(SqrtTest, Input_Output_Type_NEG)
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SquaredDifference.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+SquaredDifference::SquaredDifference(const Tensor *input1, const Tensor *input2, Tensor *output)
+ : Kernel({input1, input2}, {output})
+{
+}
+
+void SquaredDifference::configure()
+{
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type())
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type())
+ output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void SquaredDifference::execute() const
+{
+ switch (input1()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalSquaredDifference<float>();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+template <typename T> inline void SquaredDifference::evalSquaredDifference() const
+{
+ BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+ getTensorShape(input2()), getTensorData<T>(input2()),
+ getTensorShape(output()), getTensorData<T>(output()), [](T x, T y) {
+ const T difference = x - y;
+ return difference * difference;
+ });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SQUAREDDIFFERENCE_H
+#define LUCI_INTERPRETER_KERNELS_SQUAREDDIFFERENCE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SquaredDifference : public Kernel
+{
+public:
+ SquaredDifference(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+ const Tensor *input1() const { return _inputs[0]; }
+ const Tensor *input2() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ template <typename T> inline void evalSquaredDifference() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SQUAREDDIFFERENCE_H
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SquaredDifference.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(SquaredDifferenceTest, Float)
+{
+ Shape input_shape{3, 1, 2};
+ std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
+ std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43};
+ Tensor input_tensor1 = makeInputTensor<DataType::FLOAT32>(input_shape, input_data1);
+ Tensor input_tensor2 = makeInputTensor<DataType::FLOAT32>(input_shape, input_data2);
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ SquaredDifference kernel(&input_tensor1, &input_tensor2, &output_tensor);
+ kernel.configure();
+ kernel.execute();
+
+ std::vector<float> ref_output_data{4.0, 0.0, 4.0, 1.0, 1.0, 0.0001};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(SquaredDifferenceTest, FloatBroadcast)
+{
+ Shape input_shape1{3, 1, 2};
+ Shape input_shape2{1};
+ std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
+ std::vector<float> input_data2{1.0};
+ Tensor input_tensor1 = makeInputTensor<DataType::FLOAT32>(input_shape1, input_data1);
+ Tensor input_tensor2 = makeInputTensor<DataType::FLOAT32>(input_shape2, input_data2);
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ SquaredDifference kernel(&input_tensor1, &input_tensor2, &output_tensor);
+ kernel.configure();
+ kernel.execute();
+
+ std::vector<float> ref_output_data{0.0, 1.0, 4.0, 100.0, 9.0, 5.9536};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
{
Squeeze::Squeeze(const Tensor *input, Tensor *output, const SqueezeParams ¶ms)
- : KernelWithParams<SqueezeParams>({input}, {output}, params)
+ : KernelWithParams<SqueezeParams>({input}, {output}, params)
{
}
TYPED_TEST(SqueezeTest, TotalTest)
{
Check<TypeParam>(
- /*input_shape=*/{1, 24, 1}, /*output_shape=*/{24},
- /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
- 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
- /*output_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
- 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
- {-1, 0});
+ /*input_shape=*/{1, 24, 1}, /*output_shape=*/{24},
+ /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+ 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
+ /*output_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+ 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
+ {-1, 0});
}
} // namespace
StridedSlice::StridedSlice(const Tensor *input, const Tensor *begin, const Tensor *end,
const Tensor *strides, Tensor *output, const StridedSliceParams ¶ms)
- : KernelWithParams<StridedSliceParams>({input, begin, end, strides}, {output}, params)
+ : KernelWithParams<StridedSliceParams>({input, begin, end, strides}, {output}, params)
{
}
assert(stride != 0);
int32_t begin = ::tflite::strided_slice::StartForAxis(op_params, getTensorShape(input()), idx);
int32_t end =
- ::tflite::strided_slice::StopForAxis(op_params, getTensorShape(input()), idx, begin);
+ ::tflite::strided_slice::StopForAxis(op_params, getTensorShape(input()), idx, begin);
const bool shrink_axis = params().shrink_axis_mask & (1 << idx);
if (shrink_axis)
{
Sub::Sub(const Tensor *input1, const Tensor *input2, Tensor *output, const SubParams ¶ms)
- : KernelWithParams<SubParams>({input1, input2}, {output}, params)
+ : KernelWithParams<SubParams>({input1, input2}, {output}, params)
{
}
params.float_activation_max = activation_max;
const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
- getTensorShape(input1()), getTensorShape(input2()), ¶ms);
+ getTensorShape(input1()), getTensorShape(input2()), ¶ms);
if (need_broadcast)
{
tflite::reference_ops::BroadcastSubSlow(
- params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
- getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+ params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+ getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
}
else
{
params.quantized_activation_max = activation_max;
const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
- getTensorShape(input1()), getTensorShape(input2()), ¶ms);
+ getTensorShape(input1()), getTensorShape(input2()), ¶ms);
if (need_broadcast)
{
tflite::reference_ops::BroadcastSubSlow(
- params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
- getTensorShape(input2()), getTensorData<uint8_t>(input2()), getTensorShape(output()),
- getTensorData<uint8_t>(output()));
+ params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
+ getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
}
else
{
vector<float> test_data = {0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
vector<vector<int32_t>> output_shapes = {{2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
vector<vector<float>> output_data = {
- {-0.5f, 2.0f, 0.1f, 1.8f, -1.3f, 1.4f, 0.7f, 0.2f, 1.3f, 0.0f, -0.1f, -0.4f,
- 0.6f, -1.4f, 1.2f, -1.6f, -0.2f, -2.0f, 1.0f, 2.5f, 1.6f, 2.3f, 0.2f, 1.9f,
- -1.8f, -0.3f, -1.2f, -0.5f, -2.6f, -0.9f, 0.5f, -2.5f, 1.1f, -2.7f, -0.3f, -3.0f},
- {-0.5f, 2.0f, 1.3f, 0.0f, -0.2f, -2.0f, 1.0f, 2.5f, -1.2f, -0.5f, -0.3f, -3.0f},
- {-0.5f, 2.1f, -0.6f, 2.0f, 0.1f, 2.7f, 0.7f, 0.3f, 0.6f, 0.2f, 1.3f, 0.9f,
- 0.6f, -1.3f, 0.5f, -1.4f, 1.2f, -0.7f, 0.7f, 2.3f, 0.2f, 1.8f, 0.3f, 1.9f,
- -2.1f, -0.5f, -2.6f, -1.0f, -2.5f, -0.9f, 0.2f, -2.7f, -0.3f, -3.0f, -0.2f, -3.0f},
- {-0.5f, 2.1f, 0.6f, 0.2f, 1.2f, -0.7f, 0.7f, 2.3f, -2.6f, -1.0f, -0.2f, -3.0f}};
+ {-0.5f, 2.0f, 0.1f, 1.8f, -1.3f, 1.4f, 0.7f, 0.2f, 1.3f, 0.0f, -0.1f, -0.4f,
+ 0.6f, -1.4f, 1.2f, -1.6f, -0.2f, -2.0f, 1.0f, 2.5f, 1.6f, 2.3f, 0.2f, 1.9f,
+ -1.8f, -0.3f, -1.2f, -0.5f, -2.6f, -0.9f, 0.5f, -2.5f, 1.1f, -2.7f, -0.3f, -3.0f},
+ {-0.5f, 2.0f, 1.3f, 0.0f, -0.2f, -2.0f, 1.0f, 2.5f, -1.2f, -0.5f, -0.3f, -3.0f},
+ {-0.5f, 2.1f, -0.6f, 2.0f, 0.1f, 2.7f, 0.7f, 0.3f, 0.6f, 0.2f, 1.3f, 0.9f,
+ 0.6f, -1.3f, 0.5f, -1.4f, 1.2f, -0.7f, 0.7f, 2.3f, 0.2f, 1.8f, 0.3f, 1.9f,
+ -2.1f, -0.5f, -2.6f, -1.0f, -2.5f, -0.9f, 0.2f, -2.7f, -0.3f, -3.0f, -0.2f, -3.0f},
+ {-0.5f, 2.1f, 0.6f, 0.2f, 1.2f, -0.7f, 0.7f, 2.3f, -2.6f, -1.0f, -0.2f, -3.0f}};
float kQuantizedTolerance = GetTolerance(-3.f, 3.f);
pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-3.f, 3.f);
for (size_t i = 0; i < output_data.size(); ++i)
{
Tensor input1_tensor =
- makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data);
+ makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data);
Tensor input2_tensor = makeInputTensor<DataType::U8>(test_shapes[i], quant_param.first,
quant_param.second, test_data);
Tensor output_tensor =
- makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+ makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
SubParams params{};
params.activation = Activation::NONE;
Tensor input1_tensor = makeInputTensor<DataType::U8>(test_shapes[i], quant_param.first,
quant_param.second, test_data);
Tensor input2_tensor =
- makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data);
+ makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data);
Tensor output_tensor =
- makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+ makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
SubParams params{};
params.activation = Activation::NONE;
vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
vector<vector<int32_t>> output_shapes{{2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
vector<vector<float>> test_outputs = {
- {0.0f, 2.0f, 0.1f, 1.8f, 0.0f, 1.4f, 0.7f, 0.2f, 1.3f, 0.0f, 0.0f, 0.0f,
- 0.6f, 0.0f, 1.2f, 0.0f, 0.0f, 0.0f, 1.0f, 2.5f, 1.6f, 2.3f, 0.2f, 1.9f,
- 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.5f, 0.0f, 1.1f, 0.0f, 0.0f, 0.0f},
- {0.0f, 2.0f, 1.3f, 0.0f, 0.0f, 0.0f, 1.0f, 2.5f, 0.0f, 0.0f, 0.0f, 0.0f},
- {0.0f, 2.1f, 0.0f, 2.0f, 0.1f, 2.7f, 0.7f, 0.3f, 0.6f, 0.2f, 1.3f, 0.9f,
- 0.6f, 0.0f, 0.5f, 0.0f, 1.2f, 0.0f, 0.7f, 2.3f, 0.2f, 1.8f, 0.3f, 1.9f,
- 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.2f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
- {0.0f, 2.1f, 0.6f, 0.2f, 1.2f, 0.0f, 0.7f, 2.3f, 0.0f, 0.0f, 0.0f, 0.0f}};
+ {0.0f, 2.0f, 0.1f, 1.8f, 0.0f, 1.4f, 0.7f, 0.2f, 1.3f, 0.0f, 0.0f, 0.0f,
+ 0.6f, 0.0f, 1.2f, 0.0f, 0.0f, 0.0f, 1.0f, 2.5f, 1.6f, 2.3f, 0.2f, 1.9f,
+ 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.5f, 0.0f, 1.1f, 0.0f, 0.0f, 0.0f},
+ {0.0f, 2.0f, 1.3f, 0.0f, 0.0f, 0.0f, 1.0f, 2.5f, 0.0f, 0.0f, 0.0f, 0.0f},
+ {0.0f, 2.1f, 0.0f, 2.0f, 0.1f, 2.7f, 0.7f, 0.3f, 0.6f, 0.2f, 1.3f, 0.9f,
+ 0.6f, 0.0f, 0.5f, 0.0f, 1.2f, 0.0f, 0.7f, 2.3f, 0.2f, 1.8f, 0.3f, 1.9f,
+ 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.2f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+ {0.0f, 2.1f, 0.6f, 0.2f, 1.2f, 0.0f, 0.7f, 2.3f, 0.0f, 0.0f, 0.0f, 0.0f}};
vector<float> input1_data{-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f,
1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
- << "With shape number " << i;
+ << "With shape number " << i;
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i]));
}
{
Shape input_shape{1, 2, 4, 1};
std::vector<float> input_data{
- 0, -6, 2, 4, //
- 3, -2, 10, 1, //
+ 0, -6, 2, 4, //
+ 3, -2, 10, 1, //
};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
kernel.execute();
std::vector<float> ref_output_data{
- 0, -0.9999877, 0.9640275, 0.999329, //
- 0.99505475, -0.9640275, 1, 0.7615941, //
+ 0, -0.9999877, 0.9640275, 0.999329, //
+ 0.99505475, -0.9640275, 1, 0.7615941, //
};
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
}
std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(8 * kMin, 8 * kMax);
std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(kMin, kMax);
std::vector<float> input_data{
- 0, -6, 2, 4, //
- -4, -2, 8, 1, //
- 0, -6, 2, 4, //
- -4, -2, 8, 1, //
- 0, -6, 2, 4, //
- -4, -2, 8, 1, //
- 0, -6, 2, 4, //
- -4, -2, 8, 1, //
- 0, -6, 2, 4, //
- -4, -2, 8, 1, //
- 0, -6, 2, 4, //
- -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
};
Tensor input_tensor = makeInputTensor<DataType::U8>({2, 6, 4, 1}, input_quant_param.first,
input_quant_param.second, input_data);
Tensor output_tensor =
- makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+ makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
Tanh kernel(&input_tensor, &output_tensor);
kernel.configure();
kernel.execute();
std::vector<float> ref_output_data{
- 0.0, -0.999987, 0.964027, 0.999329, //
- -0.999329, -0.96402, 0.99999, 0.76159, //
- 0.0, -0.999987, 0.964027, 0.999329, //
- -0.999329, -0.96402, 0.99999, 0.76159, //
- 0.0, -0.999987, 0.964027, 0.999329, //
- -0.999329, -0.96402, 0.99999, 0.76159, //
- 0.0, -0.999987, 0.964027, 0.999329, //
- -0.999329, -0.96402, 0.99999, 0.76159, //
- 0.0, -0.999987, 0.964027, 0.999329, //
- -0.999329, -0.96402, 0.99999, 0.76159, //
- 0.0, -0.999987, 0.964027, 0.999329, //
- -0.999329, -0.96402, 0.99999, 0.76159, //
+ 0.0, -0.999987, 0.964027, 0.999329, //
+ -0.999329, -0.96402, 0.99999, 0.76159, //
+ 0.0, -0.999987, 0.964027, 0.999329, //
+ -0.999329, -0.96402, 0.99999, 0.76159, //
+ 0.0, -0.999987, 0.964027, 0.999329, //
+ -0.999329, -0.96402, 0.99999, 0.76159, //
+ 0.0, -0.999987, 0.964027, 0.999329, //
+ -0.999329, -0.96402, 0.99999, 0.76159, //
+ 0.0, -0.999987, 0.964027, 0.999329, //
+ -0.999329, -0.96402, 0.99999, 0.76159, //
+ 0.0, -0.999987, 0.964027, 0.999329, //
+ -0.999329, -0.96402, 0.99999, 0.76159, //
};
std::vector<int32_t> ref_output_shape{2, 6, 4, 1};
EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data, kTanhTolerance));
TEST(TanhTest, InputTypeInvalid_NEG)
{
std::vector<int64_t> input_data{
- 0, -6, 2, 4, //
- -4, -2, 8, 1, //
- 0, -6, 2, 4, //
- -4, -2, 8, 1, //
- 0, -6, 2, 4, //
- -4, -2, 8, 1, //
- 0, -6, 2, 4, //
- -4, -2, 8, 1, //
- 0, -6, 2, 4, //
- -4, -2, 8, 1, //
- 0, -6, 2, 4, //
- -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
};
Tensor input_tensor = makeInputTensor<DataType::S64>({2, 6, 4, 1}, input_data);
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
TEST(TanhTest, InputOutputMismatch_NEG)
{
std::vector<float> input_data{
- 0, -6, 2, 4, //
- -4, -2, 8, 1, //
- 0, -6, 2, 4, //
- -4, -2, 8, 1, //
- 0, -6, 2, 4, //
- -4, -2, 8, 1, //
- 0, -6, 2, 4, //
- -4, -2, 8, 1, //
- 0, -6, 2, 4, //
- -4, -2, 8, 1, //
- 0, -6, 2, 4, //
- -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
};
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 6, 4, 1}, input_data);
Tensor output_tensor = makeOutputTensor(DataType::U8);
float scale = tensor.scales()[channel];
size_t offset = inner_dims_size * (quant_dim_size * outer_it + channel);
std::vector<float> part_dequantized_data =
- dequantize(data.data() + offset, inner_dims_size, scale, 0);
+ dequantize(data.data() + offset, inner_dims_size, scale, 0);
dequantized_data.insert(dequantized_data.end(), part_dequantized_data.begin(),
part_dequantized_data.end());
}
using NativeT = typename DataTypeImpl<DT>::Type;
Tensor tensor(DT, shape, {{scale}, {zero_point}}, "");
std::vector<NativeT> quantized_data =
- quantize<NativeT>(data.data(), data.size(), scale, zero_point);
+ quantize<NativeT>(data.data(), data.size(), scale, zero_point);
tensor.writeData(quantized_data.data(), quantized_data.size() * sizeof(NativeT));
return tensor;
}
float scale = scales[channel];
size_t offset = inner_dims_size * (quant_dim_size * outer_it + channel);
std::vector<NativeT> part_quantized_data =
- quantize<NativeT>(data.data() + offset, inner_dims_size, scale, zero_point);
+ quantize<NativeT>(data.data() + offset, inner_dims_size, scale, zero_point);
quantized_data.insert(quantized_data.end(), part_quantized_data.begin(),
part_quantized_data.end());
}
{
const auto &f = data[i];
q.push_back(static_cast<T>(
- std::max<float>(q_min, std::min<float>(q_max, std::round(zero_point + (f / scale))))));
+ std::max<float>(q_min, std::min<float>(q_max, std::round(zero_point + (f / scale))))));
}
return q;
}
const float zero_point_from_max_error = std::abs(qmax_double) + std::abs(f_max / scale);
const float zero_point_double = zero_point_from_min_error < zero_point_from_max_error
- ? zero_point_from_min
- : zero_point_from_max;
+ ? zero_point_from_min
+ : zero_point_from_max;
// Now we need to nudge the zero point to be an integer
// (our zero points are integer, and this is motivated by the requirement
{
Transpose::Transpose(const Tensor *input, const Tensor *perm, Tensor *output)
- : Kernel({input, perm}, {output})
+ : Kernel({input, perm}, {output})
{
}
TYPED_TEST(TransposeTest, Large4D)
{
Check<TypeParam>(
- /*input_shape=*/{2, 3, 4, 5}, /*perm_shape=*/{4}, /*output_shape=*/{4, 2, 3, 5},
- /*input_data=*/{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
- 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
- 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
- 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
- 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
- 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
- 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
- 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119},
- /*perm_data=*/{2, 0, 1, 3},
- /*output_data=*/{0, 1, 2, 3, 4, 20, 21, 22, 23, 24, 40, 41, 42, 43, 44,
- 60, 61, 62, 63, 64, 80, 81, 82, 83, 84, 100, 101, 102, 103, 104,
- 5, 6, 7, 8, 9, 25, 26, 27, 28, 29, 45, 46, 47, 48, 49,
- 65, 66, 67, 68, 69, 85, 86, 87, 88, 89, 105, 106, 107, 108, 109,
- 10, 11, 12, 13, 14, 30, 31, 32, 33, 34, 50, 51, 52, 53, 54,
- 70, 71, 72, 73, 74, 90, 91, 92, 93, 94, 110, 111, 112, 113, 114,
- 15, 16, 17, 18, 19, 35, 36, 37, 38, 39, 55, 56, 57, 58, 59,
- 75, 76, 77, 78, 79, 95, 96, 97, 98, 99, 115, 116, 117, 118, 119});
+ /*input_shape=*/{2, 3, 4, 5}, /*perm_shape=*/{4}, /*output_shape=*/{4, 2, 3, 5},
+ /*input_data=*/{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
+ 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
+ 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
+ 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
+ 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
+ 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119},
+ /*perm_data=*/{2, 0, 1, 3},
+ /*output_data=*/{0, 1, 2, 3, 4, 20, 21, 22, 23, 24, 40, 41, 42, 43, 44,
+ 60, 61, 62, 63, 64, 80, 81, 82, 83, 84, 100, 101, 102, 103, 104,
+ 5, 6, 7, 8, 9, 25, 26, 27, 28, 29, 45, 46, 47, 48, 49,
+ 65, 66, 67, 68, 69, 85, 86, 87, 88, 89, 105, 106, 107, 108, 109,
+ 10, 11, 12, 13, 14, 30, 31, 32, 33, 34, 50, 51, 52, 53, 54,
+ 70, 71, 72, 73, 74, 90, 91, 92, 93, 94, 110, 111, 112, 113, 114,
+ 15, 16, 17, 18, 19, 35, 36, 37, 38, 39, 55, 56, 57, 58, 59,
+ 75, 76, 77, 78, 79, 95, 96, 97, 98, 99, 115, 116, 117, 118, 119});
}
TYPED_TEST(TransposeTest, Large2D)
{
Check<TypeParam>(
- /*input_shape=*/{10, 12}, /*perm_shape=*/{2}, /*output_shape=*/{12, 10},
- /*input_data=*/{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
- 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
- 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
- 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
- 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
- 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
- 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
- 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119},
- /*perm_data=*/{1, 0},
- /*output_data=*/{
- 0, 12, 24, 36, 48, 60, 72, 84, 96, 108, 1, 13, 25, 37, 49, 61, 73, 85, 97, 109,
- 2, 14, 26, 38, 50, 62, 74, 86, 98, 110, 3, 15, 27, 39, 51, 63, 75, 87, 99, 111,
- 4, 16, 28, 40, 52, 64, 76, 88, 100, 112, 5, 17, 29, 41, 53, 65, 77, 89, 101, 113,
- 6, 18, 30, 42, 54, 66, 78, 90, 102, 114, 7, 19, 31, 43, 55, 67, 79, 91, 103, 115,
- 8, 20, 32, 44, 56, 68, 80, 92, 104, 116, 9, 21, 33, 45, 57, 69, 81, 93, 105, 117,
- 10, 22, 34, 46, 58, 70, 82, 94, 106, 118, 11, 23, 35, 47, 59, 71, 83, 95, 107, 119});
+ /*input_shape=*/{10, 12}, /*perm_shape=*/{2}, /*output_shape=*/{12, 10},
+ /*input_data=*/{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
+ 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
+ 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
+ 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
+ 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
+ 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119},
+ /*perm_data=*/{1, 0},
+ /*output_data=*/{0, 12, 24, 36, 48, 60, 72, 84, 96, 108, 1, 13, 25, 37, 49,
+ 61, 73, 85, 97, 109, 2, 14, 26, 38, 50, 62, 74, 86, 98, 110,
+ 3, 15, 27, 39, 51, 63, 75, 87, 99, 111, 4, 16, 28, 40, 52,
+ 64, 76, 88, 100, 112, 5, 17, 29, 41, 53, 65, 77, 89, 101, 113,
+ 6, 18, 30, 42, 54, 66, 78, 90, 102, 114, 7, 19, 31, 43, 55,
+ 67, 79, 91, 103, 115, 8, 20, 32, 44, 56, 68, 80, 92, 104, 116,
+ 9, 21, 33, 45, 57, 69, 81, 93, 105, 117, 10, 22, 34, 46, 58,
+ 70, 82, 94, 106, 118, 11, 23, 35, 47, 59, 71, 83, 95, 107, 119});
}
} // namespace
TransposeConv::TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
const Tensor *bias, Tensor *output, const TransposeConvParams ¶ms)
- : KernelWithParams<TransposeConvParams>({output_shape, filter, input, bias}, {output}, params)
+ : KernelWithParams<TransposeConvParams>({output_shape, filter, input, bias}, {output}, params)
{
}
const int32_t output_width = out_shape.dim(2);
const int32_t unused_output_height =
- computeOutputSize(params().padding, output_height, filter_height, params().stride_height, 1);
+ computeOutputSize(params().padding, output_height, filter_height, params().stride_height, 1);
const int32_t unused_output_width =
- computeOutputSize(params().padding, output_width, filter_width, params().stride_width, 1);
+ computeOutputSize(params().padding, output_width, filter_width, params().stride_width, 1);
_padding_height =
- computePadding(params().stride_height, 1, output_height, filter_height, unused_output_height);
+ computePadding(params().stride_height, 1, output_height, filter_height, unused_output_height);
_padding_width =
- computePadding(params().stride_width, 1, output_width, filter_width, unused_output_width);
+ computePadding(params().stride_width, 1, output_width, filter_width, unused_output_width);
if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16)
{
DataType scratch_data_type =
- input()->element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+ input()->element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
_scratch_tensor =
- std::make_unique<Tensor>(scratch_data_type, output()->shape(), AffineQuantization{}, "");
+ std::make_unique<Tensor>(scratch_data_type, output()->shape(), AffineQuantization{}, "");
const std::vector<double> real_multipliers =
- getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+ getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
_quant_multipliers = quantizeMultipliers(real_multipliers);
}
for (int32_t out_c = 0; out_c < output_depth; ++out_c)
{
const uint8_t input_val =
- input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+ input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
const uint8_t filter_val =
- filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+ filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] +=
- static_cast<int32_t>(input_val - input()->zero_point()) *
- static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]);
+ static_cast<int32_t>(input_val - input()->zero_point()) *
+ static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]);
}
}
}
}
int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
- acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
+ acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
scaled_acc += output()->zero_point();
scaled_acc = std::max(scaled_acc, activation_min);
for (int32_t out_c = 0; out_c < output_depth; ++out_c)
{
const int16_t input_val =
- input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+ input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
const int16_t filter_val =
- filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+ filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] +=
- static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
+ static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
}
}
}
acc += bias_data[out_c];
}
int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
- acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
+ acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
scaled_acc = std::max(scaled_acc, activation_min);
scaled_acc = std::min(scaled_acc, activation_max);
{
constexpr DataType element_type = getElementType<T>();
Tensor output_shape_tensor =
- makeInputTensor<DataType::S32>(output_shape_shape, output_shape_data);
+ makeInputTensor<DataType::S32>(output_shape_shape, output_shape_data);
Tensor weight_tensor = makeInputTensor<element_type>(weight_shape, weight_data);
Tensor input_data_tensor = makeInputTensor<element_type>(input_shape, input_data);
Tensor output_tensor = makeOutputTensor(element_type);
TEST(TransposeConvTest, FloatSimple)
{
Check<float, float>(
- /*output_shape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 1}, /*input_shape=*/{1, 4, 4, 1},
- /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 4, 4, 1},
- /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9},
- /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
- /*bias_data=*/{},
- /*output_data=*/{29, 62, 83, 75, 99, 192, 237, 198, 207, 372, 417, 330, 263, 446, 485, 365},
- /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1);
+ /*output_shape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 1}, /*input_shape=*/{1, 4, 4, 1},
+ /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 4, 4, 1},
+ /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9},
+ /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+ /*bias_data=*/{},
+ /*output_data=*/{29, 62, 83, 75, 99, 192, 237, 198, 207, 372, 417, 330, 263, 446, 485, 365},
+ /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1);
SUCCEED();
}
TEST(TransposeConvTest, FloatTwoFiltersTest)
{
Check<float, float>(
- /*output_shape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 2}, /*input_shape=*/{1, 4, 4, 2},
- /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 4, 4, 1},
- /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18},
- /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
- 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32},
- /*bias_data=*/{},
- /*output_data=*/
- {184, 412, 568, 528, 678, 1347, 1689, 1434, 1494, 2715, 3057, 2442, 1968, 3352, 3652, 2760},
- /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1);
+ /*output_shape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 2}, /*input_shape=*/{1, 4, 4, 2},
+ /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 4, 4, 1},
+ /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18},
+ /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32},
+ /*bias_data=*/{},
+ /*output_data=*/
+ {184, 412, 568, 528, 678, 1347, 1689, 1434, 1494, 2715, 3057, 2442, 1968, 3352, 3652, 2760},
+ /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1);
SUCCEED();
}
TEST(TransposeConvTest, SimpleBiasTest)
{
Check<float, float>(
- /*output_shape_shape=*/{4}, /*weight_shape=*/{2, 3, 3, 1},
- /*input_shape=*/{1, 2, 2, 1},
- /*bias_shape=*/{2}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 5, 5, 2},
- /*weight_data=*/{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18},
- /*input_data=*/{1, 2, 3, 4},
- /*bias_data=*/{3, 4},
- /*output_data=*/{4, 6, 6, 8, 10, 14, 9, 12, 13, 16, 10, 12, 12, 14, 28, 32, 21,
- 24, 25, 28, 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, 24, 28, 30, 34,
- 64, 72, 39, 44, 47, 52, 42, 46, 48, 52, 106, 114, 63, 68, 71, 76},
- /*params.padding=*/luci::Padding::VALID, /*stride_height=*/2, /*stride_width=*/2);
+ /*output_shape_shape=*/{4}, /*weight_shape=*/{2, 3, 3, 1},
+ /*input_shape=*/{1, 2, 2, 1},
+ /*bias_shape=*/{2}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 5, 5, 2},
+ /*weight_data=*/{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18},
+ /*input_data=*/{1, 2, 3, 4},
+ /*bias_data=*/{3, 4},
+ /*output_data=*/{4, 6, 6, 8, 10, 14, 9, 12, 13, 16, 10, 12, 12, 14, 28, 32, 21,
+ 24, 25, 28, 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, 24, 28, 30, 34,
+ 64, 72, 39, 44, 47, 52, 42, 46, 48, 52, 106, 114, 63, 68, 71, 76},
+ /*params.padding=*/luci::Padding::VALID, /*stride_height=*/2, /*stride_width=*/2);
SUCCEED();
}
std::vector<float> bias_data{3, 4};
std::vector<int32_t> output_shape_data{1, 5, 5, 2};
std::vector<float> ref_output_data{
- 4, 6, 6, 8, 10, 14, 9, 12, 13, 16, //
- 10, 12, 12, 14, 28, 32, 21, 24, 25, 28, //
- 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, //
- 24, 28, 30, 34, 64, 72, 39, 44, 47, 52, //
- 42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
+ 4, 6, 6, 8, 10, 14, 9, 12, 13, 16, //
+ 10, 12, 12, 14, 28, 32, 21, 24, 25, 28, //
+ 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, //
+ 24, 28, 30, 34, 64, 72, 39, 44, 47, 52, //
+ 42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
};
// Choose quantization parameters carefully.
auto filter_quant = quantizationParams<uint8_t>(-24.0, 39.75); // s = 1 / 4, zp = 96
auto output_quant = quantizationParams<uint8_t>(-64.0, 191.0); // s = 1, zp = 64
- Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 2, 1}, input_quant.first,
- input_quant.second, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>({1, 2, 2, 1}, input_quant.first, input_quant.second, input_data);
Tensor filter_tensor = makeInputTensor<DataType::U8>({2, 3, 3, 1}, filter_quant.first,
filter_quant.second, filter_data);
Tensor bias_tensor =
- makeInputTensor<DataType::S32>({2}, input_quant.first * filter_quant.first, 0, bias_data);
+ makeInputTensor<DataType::S32>({2}, input_quant.first * filter_quant.first, 0, bias_data);
Tensor output_shape_tensor = makeInputTensor<DataType::S32>({4}, output_shape_data);
Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant.first, output_quant.second);
std::vector<float> bias_data{3, 4};
std::vector<int32_t> output_shape_data{1, 5, 5, 2};
std::vector<float> ref_output_data{
- 4, 6, 6, 8, 10, 14, 9, 12, 13, 16, //
- 10, 12, 12, 14, 28, 32, 21, 24, 25, 28, //
- 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, //
- 24, 28, 30, 34, 64, 72, 39, 44, 47, 52, //
- 42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
+ 4, 6, 6, 8, 10, 14, 9, 12, 13, 16, //
+ 10, 12, 12, 14, 28, 32, 21, 24, 25, 28, //
+ 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, //
+ 24, 28, 30, 34, 64, 72, 39, 44, 47, 52, //
+ 42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
};
// Choose quantization parameters carefully.
bias_scales.push_back(filter_quant_params[i].first * input_quant.first);
std::vector<int32_t> zerop(output_channels, 0);
- Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 2, 1}, input_quant.first,
- input_quant.second, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>({1, 2, 2, 1}, input_quant.first, input_quant.second, input_data);
Tensor filter_tensor = makeInputTensor<DataType::U8>({output_channels, 3, 3, 1}, filter_scales,
filter_zerops, 0, filter_data);
Tensor bias_tensor =
- makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0, bias_data);
+ makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0, bias_data);
Tensor output_shape_tensor = makeInputTensor<DataType::S32>({4}, output_shape_data);
Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant.first, output_quant.second);
std::vector<float> bias_data{3, 4};
std::vector<int32_t> output_shape_data{1, 5, 5, 2};
std::vector<float> ref_output_data{
- 4, 6, 6, 8, 10, 14, 9, 12, 13, 16, //
- 10, 12, 12, 14, 28, 32, 21, 24, 25, 28, //
- 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, //
- 24, 28, 30, 34, 64, 72, 39, 44, 47, 52, //
- 42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
+ 4, 6, 6, 8, 10, 14, 9, 12, 13, 16, //
+ 10, 12, 12, 14, 28, 32, 21, 24, 25, 28, //
+ 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, //
+ 24, 28, 30, 34, 64, 72, 39, 44, 47, 52, //
+ 42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
};
Tensor input_tensor = makeInputTensor<DataType::S16>({1, 2, 2, 1}, 0.25, 0, input_data);
std::vector<float> bias_data{3, 4};
std::vector<float> ref_output_data{
- 4, 6, 6, 8, 10, 14, 9, 12, 13, 16, //
- 10, 12, 12, 14, 28, 32, 21, 24, 25, 28, //
- 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, //
- 24, 28, 30, 34, 64, 72, 39, 44, 47, 52, //
- 42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
+ 4, 6, 6, 8, 10, 14, 9, 12, 13, 16, //
+ 10, 12, 12, 14, 28, 32, 21, 24, 25, 28, //
+ 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, //
+ 24, 28, 30, 34, 64, 72, 39, 44, 47, 52, //
+ 42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
};
const float input_scale = 0.25;
Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data);
Tensor filter_tensor =
- makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0, filter_data);
+ makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0, filter_data);
Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data);
Tensor output_shape_tensor = makeInputTensor<DataType::S32>({4}, output_shape_data);
Tensor output_tensor = makeOutputTensor(DataType::S16, output_scale, 0);
{
Unpack::Unpack(const Tensor *input, std::vector<Tensor *> outputs, const UnpackParams ¶ms)
- : KernelWithParams<UnpackParams>({input}, std::move(outputs), params)
+ : KernelWithParams<UnpackParams>({input}, std::move(outputs), params)
{
}
TYPED_TEST(UnpackTest, FiveDimensionsTwoOutputs)
{
Check<TypeParam>(
- /*axis=*/2, /*input_shape=*/{2, 2, 2, 2, 1},
- /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
- /*exp_output_shape=*/{{2, 2, 2, 1}, {2, 2, 2, 1}},
- /*exp_output_data=*/
- {{1, 2, 5, 6, 9, 10, 13, 14}, {3, 4, 7, 8, 11, 12, 15, 16}});
+ /*axis=*/2, /*input_shape=*/{2, 2, 2, 2, 1},
+ /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+ /*exp_output_shape=*/{{2, 2, 2, 1}, {2, 2, 2, 1}},
+ /*exp_output_data=*/
+ {{1, 2, 5, 6, 9, 10, 13, 14}, {3, 4, 7, 8, 11, 12, 15, 16}});
}
TYPED_TEST(UnpackTest, VectorToScalar)
return input_product_scale / static_cast<double>(output_scale);
}
+// TODO rename getQuantizedConvolutionMultiplers to something more general
+// it is used for non conv operators too
inline std::vector<double> getQuantizedConvolutionMultiplers(float input_scale,
const std::vector<float> &filter_scale,
float output_scale)
for (size_t i = 0; i < n; ++i)
{
effective_output_scales.push_back(
- getQuantizedConvolutionMultipler(input_scale, filter_scale[i], output_scale));
+ getQuantizedConvolutionMultipler(input_scale, filter_scale[i], output_scale));
}
return effective_output_scales;
}
BroadcastableWrapper(const std::vector<T> &v) : _v(v), _stride(v.size() == 1 ? 0 : 1) {}
T operator[](int idx) { return _v[idx * _stride]; }
+
private:
const std::vector<T> &_v;
int _stride;
// Build with the tensors in 'tensor_list'.
explicit VectorOfQuantizedTensors(const std::vector<TensorT *> &tensor_list)
- : VectorOfTensors<uint8_t, is_const>(tensor_list)
+ : VectorOfTensors<uint8_t, is_const>(tensor_list)
{
for (TensorT *tensor : tensor_list)
{
-nnas_find_package(GTest REQUIRED)
-
set(SOURCES
GraphLoader.h
GraphLoader.cpp
PUBLIC luci_lang luci_interpreter_core
PRIVATE luci_interpreter_kernels nncc_common)
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
set(TEST_SOURCES KernelBuilder.test.cpp)
GTest_AddTest(luci_interpreter_loader_test ${TEST_SOURCES})
} // namespace
GraphLoader::GraphLoader(
- const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
- const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
- std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
- : _graph(graph), _runtime_graph(runtime_graph), _runtime_to_ir(runtime_to_ir),
- _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor)
+ const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
+ const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
+ std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
+ : _graph(graph), _runtime_graph(runtime_graph), _runtime_to_ir(runtime_to_ir),
+ _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor)
{
}
#include "kernels/Add.h"
#include "kernels/ArgMax.h"
#include "kernels/AveragePool2D.h"
+#include "kernels/BatchToSpaceND.h"
#include "kernels/Concatenation.h"
#include "kernels/Conv2D.h"
#include "kernels/DepthToSpace.h"
#include "kernels/Mean.h"
#include "kernels/Minimum.h"
#include "kernels/Mul.h"
+#include "kernels/Neg.h"
#include "kernels/NotEqual.h"
+#include "kernels/Pack.h"
#include "kernels/Pad.h"
#include "kernels/Pow.h"
#include "kernels/Prelu.h"
#include "kernels/Rsqrt.h"
#include "kernels/Slice.h"
#include "kernels/Softmax.h"
+#include "kernels/SpaceToBatchND.h"
#include "kernels/SpaceToDepth.h"
#include "kernels/Split.h"
#include "kernels/StridedSlice.h"
#include "kernels/Sqrt.h"
-#include "kernels/Sub.h"
+#include "kernels/SquaredDifference.h"
#include "kernels/Squeeze.h"
+#include "kernels/Sub.h"
#include "kernels/Tanh.h"
#include "kernels/Unpack.h"
#include "kernels/Transpose.h"
return runtime_graph;
}
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleNode *)
+{
+ throw std::invalid_argument("Unsupported operator.");
+}
+
std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleAdd *node)
{
assert(node->arity() == 2);
return std::make_unique<kernels::AveragePool2D>(input, output, params);
}
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleBatchToSpaceND *node)
+{
+ assert(node->arity() == 3);
+
+ const Tensor *input = getInputTensor(node->input());
+ const Tensor *block_shape = getInputTensor(node->block_shape());
+ const Tensor *crops = getInputTensor(node->crops());
+ Tensor *output = getOutputTensor(node);
+
+ return std::make_unique<kernels::BatchToSpaceND>(input, block_shape, crops, output);
+}
+
std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleConcatenation *node)
{
std::vector<const Tensor *> inputs(node->numValues());
ConcatenationParams params{};
params.axis = node->axis();
+ params.activation = node->fusedActivationFunction();
return std::make_unique<kernels::Concatenation>(std::move(inputs), output, params);
}
return std::make_unique<kernels::Mul>(input1, input2, output, params);
}
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleNeg *node)
+{
+ assert(node->arity() == 1);
+
+ const Tensor *input = getInputTensor(node->x());
+ Tensor *output = getOutputTensor(node);
+
+ return std::make_unique<kernels::Neg>(input, output);
+}
+
std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleNotEqual *node)
{
assert(node->arity() == 2);
throw std::runtime_error("Output node cannot be executed.");
}
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CirclePack *node)
+{
+ assert(node->arity() == node->values_count());
+
+ std::vector<const Tensor *> inputs(node->values_count());
+ for (uint32_t i = 0; i < node->values_count(); ++i)
+ {
+ inputs[i] = getInputTensor(node->values(i));
+ }
+ Tensor *output = getOutputTensor(node);
+
+ PackParams params{};
+ params.axis = node->axis();
+ params.values_count = node->values_count();
+
+ return std::make_unique<kernels::Pack>(std::move(inputs), output, params);
+}
+
std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CirclePad *node)
{
assert(node->arity() == 2);
return std::make_unique<kernels::Rsqrt>(input, output);
}
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSub *node)
-{
- assert(node->arity() == 2);
-
- const Tensor *input1 = getInputTensor(node->x());
- const Tensor *input2 = getInputTensor(node->y());
- Tensor *output = getOutputTensor(node);
-
- SubParams params{};
- params.activation = node->fusedActivationFunction();
-
- return std::make_unique<kernels::Sub>(input1, input2, output, params);
-}
-
std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSlice *node)
{
assert(node->arity() == 3);
return std::make_unique<kernels::Softmax>(input, output, params);
}
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSpaceToBatchND *node)
+{
+ assert(node->arity() == 3);
+
+ const Tensor *input = getInputTensor(node->input());
+ const Tensor *block_shape = getInputTensor(node->block_shape());
+ const Tensor *paddings = getInputTensor(node->paddings());
+
+ Tensor *output = getOutputTensor(node);
+
+ return std::make_unique<kernels::SpaceToBatchND>(input, block_shape, paddings, output);
+ ;
+}
+
std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSpaceToDepth *node)
{
assert(node->arity() == 1);
return std::make_unique<kernels::Sqrt>(input, output);
}
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSquaredDifference *node)
+{
+ assert(node->arity() == 2);
+
+ const Tensor *input1 = getInputTensor(node->x());
+ const Tensor *input2 = getInputTensor(node->y());
+ Tensor *output = getOutputTensor(node);
+
+ return std::make_unique<kernels::SquaredDifference>(input1, input2, output);
+}
+
std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSqueeze *node)
{
assert(node->arity() == 1);
return std::make_unique<kernels::StridedSlice>(input, begin, end, strides, output, params);
}
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSub *node)
+{
+ assert(node->arity() == 2);
+
+ const Tensor *input1 = getInputTensor(node->x());
+ const Tensor *input2 = getInputTensor(node->y());
+ Tensor *output = getOutputTensor(node);
+
+ SubParams params{};
+ params.activation = node->fusedActivationFunction();
+
+ return std::make_unique<kernels::Sub>(input1, input2, output, params);
+}
+
std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTanh *node)
{
assert(node->arity() == 1);
{
public:
KernelBuilder(
- const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
- const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
- : _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor)
+ const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
+ const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
+ : _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor)
{
}
+ std::unique_ptr<Kernel> visit(const luci::CircleNode *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleAdd *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleArgMax *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleAveragePool2D *node) override;
+ std::unique_ptr<Kernel> visit(const luci::CircleBatchToSpaceND *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleConcatenation *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleConv2D *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleConst *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleMean *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleMinimum *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleMul *node) override;
+ std::unique_ptr<Kernel> visit(const luci::CircleNeg *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleNotEqual *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleOutput *node) override;
+ std::unique_ptr<Kernel> visit(const luci::CirclePack *node) override;
std::unique_ptr<Kernel> visit(const luci::CirclePad *node) override;
std::unique_ptr<Kernel> visit(const luci::CirclePow *node) override;
std::unique_ptr<Kernel> visit(const luci::CirclePRelu *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleResizeNearestNeighbor *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleReverseV2 *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleRsqrt *node) override;
- std::unique_ptr<Kernel> visit(const luci::CircleSub *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleSlice *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleSoftmax *node) override;
+ std::unique_ptr<Kernel> visit(const luci::CircleSpaceToBatchND *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleSpaceToDepth *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleSplit *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleStridedSlice *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleSqrt *node) override;
+ std::unique_ptr<Kernel> visit(const luci::CircleSquaredDifference *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleSqueeze *node) override;
+ std::unique_ptr<Kernel> visit(const luci::CircleSub *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleTanh *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleTranspose *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleTransposeConv *node) override;
#include <kernels/Mean.h>
#include <kernels/Minimum.h>
#include <kernels/Mul.h>
+#include <kernels/Neg.h>
#include <kernels/NotEqual.h>
#include <kernels/Pad.h>
#include <kernels/Pow.h>
#include <kernels/SpaceToDepth.h>
#include <kernels/Split.h>
#include <kernels/Sqrt.h>
-#include <kernels/Sub.h>
+#include <kernels/SquaredDifference.h>
#include <kernels/Squeeze.h>
#include <kernels/StridedSlice.h>
+#include <kernels/Sub.h>
#include <kernels/Tanh.h>
#include <kernels/Transpose.h>
#include <kernels/TransposeConv.h>
checkTensor(kernel->input(1), input2);
checkTensor(kernel->output(), op);
EXPECT_THAT(kernel->params().axis, Eq(op->axis()));
+ EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
}
TEST_F(KernelBuilderTest, Conv2D)
EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
}
+TEST_F(KernelBuilderTest, Neg)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleNeg>();
+ op->x(input);
+
+ auto kernel = buildKernel<kernels::Neg>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+}
+
TEST_F(KernelBuilderTest, NotEqual)
{
auto *x_input = createInputNode();
checkTensor(kernel->output(), op);
}
-TEST_F(KernelBuilderTest, Sub)
+TEST_F(KernelBuilderTest, SquaredDifference)
{
auto *input1 = createInputNode();
auto *input2 = createInputNode();
- auto *op = createNode<luci::CircleSub>();
+ auto *op = createNode<luci::CircleSquaredDifference>();
op->x(input1);
op->y(input2);
- op->fusedActivationFunction(luci::FusedActFunc::RELU);
-
- auto kernel = buildKernel<kernels::Sub>(op);
+ auto kernel = buildKernel<kernels::SquaredDifference>(op);
ASSERT_THAT(kernel, NotNull());
checkTensor(kernel->input1(), input1);
checkTensor(kernel->input2(), input2);
checkTensor(kernel->output(), op);
- EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
}
TEST_F(KernelBuilderTest, Squeeze)
EXPECT_THAT(kernel->params().shrink_axis_mask, Eq(op->shrink_axis_mask()));
}
+TEST_F(KernelBuilderTest, Sub)
+{
+ auto *input1 = createInputNode();
+ auto *input2 = createInputNode();
+
+ auto *op = createNode<luci::CircleSub>();
+ op->x(input1);
+ op->y(input2);
+
+ op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+ auto kernel = buildKernel<kernels::Sub>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input1(), input1);
+ checkTensor(kernel->input2(), input2);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
TEST_F(KernelBuilderTest, Tanh)
{
auto *input = createInputNode();
ModuleLoader::ModuleLoader(const luci::Module *module, RuntimeModule *runtime_module,
RuntimeToIR &runtime_to_ir,
std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
- : _module(module), _runtime_module(runtime_module), _runtime_to_ir(runtime_to_ir),
- _node_to_tensor(node_to_tensor)
+ : _module(module), _runtime_module(runtime_module), _runtime_to_ir(runtime_to_ir),
+ _node_to_tensor(node_to_tensor)
{
}
--- /dev/null
+/test.local.lst
--- /dev/null
+unset(TEST_DEPS)
+unset(LUCI_PASS_VALUE_TESTS)
+
+get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
+
+macro(addeval RECIPE PASS_OPTION)
+ list(APPEND LUCI_PASS_VALUE_TESTS ${RECIPE})
+
+ set(CIRCLE_FILE "${RECIPE}.circle")
+ set(CIRCLE_PATH "${ARTIFACTS_BIN_PATH}/${CIRCLE_FILE}")
+
+ set(PASS_CIRCLE_FILE "${RECIPE}.pass.circle")
+ set(PASS_CIRCLE_OUTPUT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${PASS_CIRCLE_FILE}")
+
+ set(DASH_PASS_OPTION "--${PASS_OPTION}")
+
+ # Generate optimized .circle
+ add_custom_command(OUTPUT ${PASS_CIRCLE_OUTPUT_PATH}
+ COMMAND $<TARGET_FILE:circle2circle> ${DASH_PASS_OPTION} ${CIRCLE_PATH} ${PASS_CIRCLE_OUTPUT_PATH}
+ DEPENDS $<TARGET_FILE:circle2circle> ${CIRCLE_PATH}
+ COMMENT "Generate ${PASS_CIRCLE_FILE} with ${DASH_PASS_OPTION}"
+ )
+
+ # depends
+ list(APPEND TEST_DEPS ${PASS_CIRCLE_OUTPUT_PATH})
+
+endmacro(addeval)
+
+# Read "test.lst"
+include("test.lst")
+# Read "test.local.lst" if exists
+include("test.local.lst" OPTIONAL)
+
+add_custom_target(luci_pass_value_test_files ALL DEPENDS ${TEST_DEPS})
+add_dependencies(luci_pass_value_test_files common_artifacts_deps)
+
+add_test(NAME luci_pass_value_test
+ COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/eval_driver.sh"
+ "${CMAKE_CURRENT_BINARY_DIR}"
+ "${ARTIFACTS_BIN_PATH}"
+ "${NNCC_OVERLAY_DIR}/venv_2_3_0"
+ "$<TARGET_FILE:luci_eval_driver>"
+ ${LUCI_PASS_VALUE_TESTS}
+)
--- /dev/null
+# luci-pass-value-test
+
+`luci-pass-value-test` validates execution result values of tflite model and
+circle model generated with specific optimization.
+
+The test proceeds as follows:
+
+Step 0: Use tflite and circle file in 'common-artifacts' folder as the source model.
+ - tflite file is used as to generate reference execution result
+ - circle file is used as source of optimization to apply
+
+Step 1: Run circle2circle with given optimization option to produce transformed circle.
+ - "modelfile.circle" -> circle2circle -> "modelfile.pass.circle"
+
+Step 2: Run TFLite interpreter and luci-interpreter for the source tflite and circle, respectively.
+ (with the same input tensors filled with random values)
+ - "modelfile.tflite" ------> TFLite interpreter -> Execution result 1
+ - "modelfile.pass.circle" -> luci-interpreter ---> Execution result 2
+
+Step 3: Compare the execution result 1 and 2. Test is PASSED if results are sames.
--- /dev/null
+#!/bin/bash
+
+# This script verifies the tflite and circle execution result values
+#
+# HOW TO USE
+#
+# ./eval_driver.sh <path/to/bin_dir> <path/to/work_dir> <path/to/venv_dir> <path/to/intp_dir>
+# <TEST 1> <TEST 2> ...
+# bin_dir : build directory of luci-pass-value-test (ex: build/compiler/luci-pass-value-test)
+# work_dir : artifacts directoy where test materials exist
+# venv_dir : python virtual environment home directory
+# intp_dir : path to luci_eval_driver from luci-eval-driver
+
+VERIFY_SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+VERIFY_SCRIPT_PATH="${VERIFY_SOURCE_PATH}/eval_result_verifier.py"
+BINDIR="$1"; shift
+WORKDIR="$1"; shift
+VIRTUALENV="$1"; shift
+INTERPRETER_DRIVER_PATH="$1"; shift
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+for TESTCASE in "$@"; do
+ TESTED+=("${TESTCASE}")
+
+ TESTCASE_TFLITE_FILE="${WORKDIR}/${TESTCASE}.tflite"
+ TESTCASE_CIRCLE_FILE="${BINDIR}/${TESTCASE}.pass.circle"
+ TEST_RESULT_FILE="${BINDIR}/${TESTCASE}"
+
+ PASSED_TAG="${TEST_RESULT_FILE}.passed"
+ rm -f "${PASSED_TAG}"
+
+ cat > "${TEST_RESULT_FILE}.log" <(
+ exec 2>&1
+ set -ex
+
+ source "${VIRTUALENV}/bin/activate"
+
+ "${VIRTUALENV}/bin/python" "${VERIFY_SCRIPT_PATH}" \
+ --driver "${INTERPRETER_DRIVER_PATH}" \
+ --tflite "${TESTCASE_TFLITE_FILE}" \
+ --circle "${TESTCASE_CIRCLE_FILE}"
+
+ if [[ $? -eq 0 ]]; then
+ touch "${PASSED_TAG}"
+ fi
+ )
+
+ if [[ -f "${PASSED_TAG}" ]]; then
+ PASSED+=("${TESTCASE}")
+ else
+ FAILED+=("${TESTCASE}")
+ fi
+done
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 255
+fi
+
+echo "PASSED"
+exit 0
--- /dev/null
+#!/usr/bin/env python3
+import numpy as np
+import tensorflow as tf
+import subprocess
+import argparse
+import traceback
+
+#
+# This script was copied from luci-value-test with input arguments are tflite and circle path
+#
+parser = argparse.ArgumentParser()
+parser.add_argument('--driver', type=str, required=True)
+parser.add_argument('--tflite', type=str, required=True)
+parser.add_argument('--circle', type=str, required=True)
+args = parser.parse_args()
+
+driver = args.driver
+tflite_model = args.tflite
+circle_model = args.circle
+
+# Build TFLite interpreter.
+interpreter = tf.lite.Interpreter(tflite_model)
+interpreter.allocate_tensors()
+
+# Generate random input data.
+num_inputs = len(interpreter.get_input_details())
+for i in range(num_inputs):
+ input_details = interpreter.get_input_details()[i]
+ if input_details["dtype"] == np.float32:
+ input_data = np.array(
+ np.random.random_sample(input_details["shape"]), input_details["dtype"])
+ elif input_details["dtype"] == np.uint8:
+ input_data = np.array(
+ np.random.randint(0, 256, size=input_details["shape"]),
+ input_details["dtype"])
+ elif input_details["dtype"] == np.bool_:
+ input_data = np.array(
+ np.random.choice(a=[True, False], size=input_details["shape"]),
+ input_details["dtype"])
+ else:
+ raise SystemExit("Unsupported input dtype")
+
+ interpreter.set_tensor(input_details["index"], input_data)
+ input_data.tofile(circle_model + ".input" + str(i))
+
+# Do inference
+interpreter.invoke()
+
+# Execute luci interpreter.
+subprocess.run(
+ [
+ driver, circle_model,
+ str(num_inputs), circle_model + ".input", circle_model + ".output"
+ ],
+ check=True)
+
+# Compare the results.
+for idx in range(len(interpreter.get_output_details())):
+ output_details = interpreter.get_output_details()[idx]
+ output_data = np.fromfile(circle_model + ".output" + str(idx),
+ output_details["dtype"])
+ shape_file = open(circle_model + ".output" + str(idx) + ".shape", 'r')
+ output_shape = [int(i) for i in shape_file.read().split(',')]
+ luci_output_data = np.reshape(output_data, output_shape)
+ try:
+ if output_details["dtype"] == np.uint8:
+ if np.allclose(
+ luci_output_data,
+ interpreter.get_tensor(
+ interpreter.get_output_details()[idx]["index"]),
+ rtol=0,
+ atol=0) == False:
+ raise SystemExit("Execution result of " + tflite_model +
+ " does not match with " + circle_model)
+ elif output_details["dtype"] == np.float32:
+ if np.allclose(
+ luci_output_data,
+ interpreter.get_tensor(
+ interpreter.get_output_details()[idx]["index"]),
+ rtol=1.e-5,
+ atol=1.e-5) == False:
+ raise SystemExit("Execution result of " + tflite_model +
+ " does not match with " + circle_model)
+ elif output_details["dtype"] == np.int64:
+ if np.allclose(
+ luci_output_data,
+ interpreter.get_tensor(
+ interpreter.get_output_details()[idx]["index"]),
+ rtol=0,
+ atol=0) == False:
+ raise SystemExit("Execution result of " + tflite_model +
+ " does not match with " + circle_model)
+ elif output_details["dtype"] == np.int32:
+ if np.allclose(
+ luci_output_data,
+ interpreter.get_tensor(
+ interpreter.get_output_details()[idx]["index"]),
+ rtol=0,
+ atol=0) == False:
+ raise SystemExit("Execution result of " + tflite_model +
+ " does not match with " + circle_model)
+ else:
+ raise SystemExit("Unsupported data type: ", output_details["dtype"])
+ except:
+ print(traceback.format_exc())
+ quit(255)
+
+quit(0)
--- /dev/null
+require("common-artifacts")
+require("luci-interpreter")
+require("safemain")
+require("oops")
+require("loco")
+require("luci-value-test")
+require("luci-eval-driver")
--- /dev/null
+#
+# Format:
+# addeval(MODEL PASS)
+# MODEL: tflite model file name in build/compiler/common-artifacts folder.
+# PASS: Optimization Pass to test. Supports only one Pass for now.
+#
+
+# addeval(Net_Preactivation_BN_000 fuse_preactivation_batchnorm) : value diff exist
+# --> https://github.com/Samsung/ONE/issues/5782
+addeval(Net_Conv_Add_Mul_000 fuse_batchnorm_with_conv)
+addeval(Net_Conv_Add_Mul_000 fuse_batchnorm_with_conv)
+addeval(Net_Conv_Add_Mul_001 fuse_batchnorm_with_conv)
+addeval(Net_Conv_Add_Mul_002 fuse_batchnorm_with_conv)
+addeval(Net_Conv_Min_Max_000 transform_min_max_to_relu6)
+addeval(Net_Conv_Relu6_000 fuse_activation_function)
+addeval(Net_DwConv_BN_000 fuse_batchnorm_with_dwconv)
+addeval(Net_DwConv_BN_001 fuse_batchnorm_with_dwconv)
+addeval(Net_Reshape_Neg_000 forward_reshape_to_unaryop)
+addeval(Net_Reshape_Reshape_000 remove_redundant_reshape)
+addeval(Net_Squeeze_Squeeze_000 substitute_squeeze_to_reshape)
+addeval(Net_TConv_Add_000 fuse_add_with_tconv)
+addeval(Net_TConv_Add_001 fuse_add_with_tconv)
+addeval(Net_TConv_Add_002 fuse_add_with_tconv)
+addeval(Net_TConv_BN_000 fuse_batchnorm_with_tconv)
+addeval(Net_TConv_BN_001 fuse_batchnorm_with_tconv)
+addeval(Net_TConv_BN_002 fuse_batchnorm_with_tconv)
+addeval(Net_InstanceNorm_001 fuse_instnorm)
+addeval(Net_InstanceNorm_002 fuse_instnorm)
+addeval(Net_InstanceNorm_003 fuse_instnorm)
+addeval(Net_StridedSlice_StridedSlice_000 remove_unnecessary_strided_slice)
--- /dev/null
+/test.local.lst
# Generate dependencies
add_custom_target(luci_eval_testfiles ALL DEPENDS ${TESTFILES})
-add_subdirectory(tester)
-
get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
add_test(NAME luci_value_test
"${CMAKE_CURRENT_BINARY_DIR}"
"${ARTIFACTS_BIN_PATH}"
"${NNCC_OVERLAY_DIR}/venv_2_3_0"
+ "$<TARGET_FILE:luci_eval_driver>"
${LUCI_VALUE_TESTS}
)
BINDIR="$1"; shift
WORKDIR="$1"; shift
VIRTUALENV="$1"; shift
-INTERPRETER_DRIVER_PATH="${BINDIR}/tester/luci_eval_tester"
+INTERPRETER_DRIVER_PATH="$1"; shift
TESTED=()
PASSED=()
# This script compares the execution result of luci-interpreter with that of TFLite interpreter
#
# Basic usage:
-# eval_verifier.py --driver build/compiler/luci-value-test/tester/luci_eval_tester
+# eval_verifier.py --driver build/compiler/luci-eval-driver/luci_eval_driver
# --model inception_v3
parser = argparse.ArgumentParser()
parser.add_argument('--driver', type=str, required=True)
require("safemain")
require("oops")
require("loco")
+require("luci-eval-driver")
#addeval(Square_000)
#addeval(SquaredDifference_000)
addeval(Squeeze_000)
+addeval(Squeeze_001)
addeval(StridedSlice_000)
addeval(StridedSlice_001)
addeval(StridedSlice_002)
+++ /dev/null
-
-set(SRCS_EVAL_TESTER
- src/EvalTester.cpp
- )
-
-add_executable(luci_eval_tester ${SRCS_EVAL_TESTER})
-target_link_libraries(luci_eval_tester PRIVATE oops)
-target_link_libraries(luci_eval_tester PRIVATE loco)
-target_link_libraries(luci_eval_tester PRIVATE luci_import)
-target_link_libraries(luci_eval_tester PRIVATE luci_export)
-target_link_libraries(luci_eval_tester PRIVATE luci_lang)
-target_link_libraries(luci_eval_tester PRIVATE luci_interpreter)
-target_link_libraries(luci_eval_tester PRIVATE safemain)
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <luci/Importer.h>
-#include <luci_interpreter/Interpreter.h>
-#include <luci/CircleExporter.h>
-#include <luci/CircleFileExpContract.h>
-
-#include <cstdlib>
-#include <fstream>
-#include <iostream>
-#include <vector>
-#include <map>
-#include <string>
-#include <random>
-
-namespace
-{
-
-void readDataFromFile(const std::string &filename, char *data, size_t data_size)
-{
- std::ifstream fs(filename, std::ifstream::binary);
- if (fs.fail())
- throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
- if (fs.read(data, data_size).fail())
- throw std::runtime_error("Failed to read data from file \"" + filename + "\".\n");
-}
-
-void writeDataToFile(const std::string &filename, const char *data, size_t data_size)
-{
- std::ofstream fs(filename, std::ofstream::binary);
- if (fs.fail())
- throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
- if (fs.write(data, data_size).fail())
- {
- throw std::runtime_error("Failed to write data to file \"" + filename + "\".\n");
- }
-}
-
-std::unique_ptr<luci::Module> importModel(const std::string &filename)
-{
- std::ifstream fs(filename, std::ifstream::binary);
- if (fs.fail())
- {
- throw std::runtime_error("Cannot open model file \"" + filename + "\".\n");
- }
- std::vector<char> model_data((std::istreambuf_iterator<char>(fs)),
- std::istreambuf_iterator<char>());
- return luci::Importer().importModule(circle::GetModel(model_data.data()));
-}
-
-template <typename NodeT> size_t getTensorSize(const NodeT *node)
-{
- uint32_t tensor_size = loco::size(node->dtype());
- for (uint32_t i = 0; i < node->rank(); ++i)
- tensor_size *= node->dim(i).value();
- return tensor_size;
-}
-
-} // namespace
-
-/*
- * @brief EvalTester main
- *
- * Driver for testing luci-inerpreter
- *
- */
-int entry(int argc, char **argv)
-{
- if (argc != 5)
- {
- std::cerr
- << "Usage: " << argv[0]
- << " <path/to/circle/model> <num_inputs> <path/to/input/prefix> <path/to/output/file>\n";
- return EXIT_FAILURE;
- }
-
- const char *filename = argv[1];
- const int32_t num_inputs = atoi(argv[2]);
- const char *input_prefix = argv[3];
- const char *output_file = argv[4];
- const std::string intermediate_filename = std::string(filename) + ".inter.circle";
-
- // Load model from the file
- std::unique_ptr<luci::Module> initial_module = importModel(filename);
- if (initial_module == nullptr)
- {
- std::cerr << "ERROR: Failed to load '" << filename << "'" << std::endl;
- return EXIT_FAILURE;
- }
-
- // Export to a Circle file
- luci::CircleExporter exporter;
-
- luci::CircleFileExpContract contract(initial_module.get(), intermediate_filename);
-
- if (!exporter.invoke(&contract))
- {
- std::cerr << "ERROR: Failed to export '" << intermediate_filename << "'" << std::endl;
- return EXIT_FAILURE;
- }
-
- // Import model again
- std::unique_ptr<luci::Module> module = importModel(intermediate_filename);
- if (module == nullptr)
- {
- std::cerr << "ERROR: Failed to load '" << intermediate_filename << "'" << std::endl;
- return EXIT_FAILURE;
- }
-
- // Create interpreter.
- luci_interpreter::Interpreter interpreter(module.get());
-
- // Set input.
- // Data for n'th input is read from ${input_prefix}n
- // (ex: Add.circle.input0, Add.circle.input1 ..)
- const auto input_nodes = loco::input_nodes(module->graph());
- assert(num_inputs == input_nodes.size());
- for (int32_t i = 0; i < num_inputs; i++)
- {
- const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[i]);
- std::vector<char> input_data(getTensorSize(input_node));
- readDataFromFile(std::string(input_prefix) + std::to_string(i), input_data.data(),
- input_data.size());
- interpreter.writeInputTensor(input_node, input_data.data(), input_data.size());
- }
-
- // Do inference.
- interpreter.interpret();
-
- // Get output.
- const auto output_nodes = loco::output_nodes(module->graph());
- for (int i = 0; i < module->graph()->outputs()->size(); i++)
- {
- const auto *output_node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]);
- std::vector<char> output_data(getTensorSize(output_node));
- interpreter.readOutputTensor(output_node, output_data.data(), output_data.size());
-
- // Output data is written in ${output_file}
- // (ex: Add.circle.output0)
- // Output shape is written in ${output_file}.shape
- // (ex: Add.circle.output0.shape)
- writeDataToFile(std::string(output_file) + std::to_string(i), output_data.data(),
- output_data.size());
- // In case of Tensor output is Scalar value.
- // The output tensor with rank 0 is treated as a scalar with shape (1)
- if (output_node->rank() == 0)
- {
- writeDataToFile(std::string(output_file) + std::to_string(i) + ".shape", "1", 1);
- }
- else
- {
- auto shape_str = std::to_string(output_node->dim(0).value());
- for (int j = 1; j < output_node->rank(); j++)
- {
- shape_str += ",";
- shape_str += std::to_string(output_node->dim(j).value());
- }
- writeDataToFile(std::string(output_file) + std::to_string(i) + ".shape", shape_str.c_str(),
- shape_str.size());
- }
- }
- return EXIT_SUCCESS;
-}
add_subdirectory(env)
add_subdirectory(log)
add_subdirectory(lang)
+add_subdirectory(testhelper)
add_subdirectory(service)
add_subdirectory(pass)
+add_subdirectory(profile)
+add_subdirectory(partition)
add_subdirectory(logex)
add_subdirectory(import)
add_subdirectory(export)
Undefined,
MuteWarnings,
DisableValidation,
+ ProfilingDataGen,
};
static UserSettings *settings();
private:
bool _MuteWarnings{false};
bool _DisableValidation{false};
+ bool _ProfilingDataGen{false};
};
void UserSettingsImpl::set(const Key key, bool value)
case Key::DisableValidation:
_DisableValidation = value;
break;
+ case Key::ProfilingDataGen:
+ _ProfilingDataGen = value;
+ break;
default:
throw std::runtime_error("Invalid key in boolean set");
break;
return _MuteWarnings;
case Key::DisableValidation:
return _DisableValidation;
+ case Key::ProfilingDataGen:
+ return _ProfilingDataGen;
default:
throw std::runtime_error("Invalid key in boolean get");
break;
ASSERT_TRUE(settings->get(luci::UserSettings::Key::DisableValidation));
}
+TEST(UserSettings, ProfilingDataGen)
+{
+ auto settings = luci::UserSettings::settings();
+ ASSERT_NE(nullptr, settings);
+
+ settings->set(luci::UserSettings::Key::ProfilingDataGen, false);
+ ASSERT_FALSE(settings->get(luci::UserSettings::Key::ProfilingDataGen));
+
+ settings->set(luci::UserSettings::Key::ProfilingDataGen, true);
+ ASSERT_TRUE(settings->get(luci::UserSettings::Key::ProfilingDataGen));
+}
+
TEST(UserSettings, undefined_set_NEG)
{
auto settings = luci::UserSettings::settings();
target_link_libraries(luci_export PRIVATE luci_env)
target_link_libraries(luci_export PRIVATE luci_log)
target_link_libraries(luci_export PRIVATE luci_logex)
+target_link_libraries(luci_export PRIVATE luci_profile)
target_link_libraries(luci_export PRIVATE nncc_common)
target_link_libraries(luci_export PRIVATE locop)
target_link_libraries(luci_export PRIVATE oops)
{
public:
CircleFileExpContract(luci::Module *module, const std::string &filename)
- : _module(module), _filepath(filename)
+ : _module(module), _filepath(filename)
{
// NOTHING TO DO
}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleExportMetadata.h"
+
+#include <luci/UserSettings.h>
+
+namespace
+{
+
+void write_u32(std::vector<uint8_t> &to, uint32_t value)
+{
+ to.emplace_back(0xFF & (value >> 0 * 8));
+ to.emplace_back(0xFF & (value >> 1 * 8));
+ to.emplace_back(0xFF & (value >> 2 * 8));
+ to.emplace_back(0xFF & (value >> 3 * 8));
+}
+
+flatbuffers::Offset<circle::Metadata> metadata_offset(flatbuffers::FlatBufferBuilder &builder,
+ luci::SerializedModelData &md,
+ const std::vector<uint8_t> &data,
+ const std::string &metadata_name)
+{
+ auto buffer_id = static_cast<uint32_t>(md._buffers.size());
+ md._buffers.push_back(circle::CreateBufferDirect(builder, &data));
+ return circle::CreateMetadataDirect(builder, metadata_name.c_str(), buffer_id);
+}
+
+} // namespace
+
+namespace luci
+{
+
+// 'source_table' is encoded to binary format.
+const std::vector<uint8_t> CircleExportMetadata::encoded_source_table(void)
+{
+ std::vector<uint8_t> data;
+
+ write_u32(data, _source_table.size());
+
+ for (auto &kv : _source_table)
+ {
+ const auto id = kv.first;
+ write_u32(data, id);
+
+ const auto origin_name = kv.second;
+ const auto length = origin_name.length();
+ write_u32(data, length + 1); // name + '\0
+
+ for (uint32_t i = 0; i < length; ++i)
+ {
+ data.emplace_back(origin_name.at(i));
+ }
+ data.emplace_back('\0');
+ }
+
+ return data;
+}
+
+// 'op_table' is encoded to binary format.
+const std::vector<uint8_t> CircleExportMetadata::encoded_op_table(void)
+{
+ std::vector<uint8_t> data;
+
+ write_u32(data, _op_table.size());
+
+ for (auto &kv : _op_table)
+ {
+ const auto id = kv.first;
+ write_u32(data, id);
+
+ const auto origins = kv.second;
+ const auto node_num = origins.size();
+ write_u32(data, node_num);
+
+ for (auto origin : origins)
+ {
+ write_u32(data, origin);
+ }
+ }
+
+ return data;
+}
+
+} // namespace luci
+
+namespace luci
+{
+
+std::vector<flatbuffers::Offset<circle::Metadata>>
+createCircleMetadataVector(flatbuffers::FlatBufferBuilder &builder, luci::SerializedModelData &md)
+{
+ std::vector<flatbuffers::Offset<circle::Metadata>> metadata_vec;
+
+ auto settings = luci::UserSettings::settings();
+ if (settings->get(luci::UserSettings::Key::ProfilingDataGen))
+ {
+ metadata_vec.emplace_back(
+ metadata_offset(builder, md, md._metadata.encoded_source_table(), "ONE_source_table"));
+
+ metadata_vec.emplace_back(
+ metadata_offset(builder, md, md._metadata.encoded_op_table(), "ONE_op_table"));
+ }
+
+ return metadata_vec;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_EXPORT_METADATA_H__
+#define __LUCI_CIRCLE_EXPORT_METADATA_H__
+
+#include "SerializedData.h"
+
+#include <flatbuffers/flatbuffers.h>
+#include <mio/circle/schema_generated.h>
+
+namespace luci
+{
+
+/**
+ * @brief Create Metadata corresponding to model metadata
+ */
+std::vector<flatbuffers::Offset<circle::Metadata>>
+createCircleMetadataVector(flatbuffers::FlatBufferBuilder &builder, SerializedModelData &md);
+
+} // namespace luci
+
+#endif // __LUCI_CIRCLE_EXPORT_METADATA_H__
#include "CircleExporterImpl.h"
#include "Optimize.h"
+#include "CircleExportMetadata.h"
#include "CircleTensorExporter.h"
#include "CircleOperationExporter.h"
#include "CircleExporterUtils.h"
+#include <luci/IR/CircleNodes.h>
+
#include <oops/InternalExn.h>
#include <mio/circle/schema_generated.h>
#include <flatbuffers/flatbuffers.h>
#include <cassert>
#include <unordered_map>
#include <string>
-#include <stdexcept>
+#include <vector>
namespace
{
-luci::CircleInput *input_node(loco::Graph *g, const loco::GraphInputIndex &index)
-{
- for (uint32_t n = 0; n < g->nodes()->size(); ++n)
- {
- if (auto input = dynamic_cast<luci::CircleInput *>(g->nodes()->at(n)))
- {
- if (input->indexed() && input->index() == index)
- {
- return input;
- }
- }
- }
- return nullptr;
-}
-
-luci::CircleOutput *output_node(loco::Graph *g, const loco::GraphOutputIndex &index)
-{
- for (uint32_t n = 0; n < g->nodes()->size(); ++n)
- {
- if (auto output = dynamic_cast<luci::CircleOutput *>(g->nodes()->at(n)))
- {
- if (output->indexed() && output->index() == index)
- {
- return output;
- }
- }
- }
- return nullptr;
-}
-
void registerGraphInputTensors(loco::Graph *graph, luci::SubGraphContext &ctx)
{
for (uint32_t n = 0; n < graph->inputs()->size(); ++n)
{
- auto node = input_node(graph, n);
+ auto node = luci::input_node(graph, n);
assert(node != nullptr);
ctx._inputs.push_back(luci::get_tensor_index(node));
}
{
for (uint32_t n = 0; n < graph->outputs()->size(); ++n)
{
- auto push = output_node(graph, n);
+ auto push = luci::output_node(graph, n);
assert(push != nullptr);
auto node = push->from();
assert(node != nullptr);
else
{
operator_codes_vec[idx] =
- CreateOperatorCode(builder, it.first.opcode, builder.CreateString(it.first.custom_code));
+ CreateOperatorCode(builder, it.first.opcode, builder.CreateString(it.first.custom_code));
}
}
std::string description_str = "nnpackage";
auto description = _builder.CreateString(description_str);
+ // Metadata
+ auto metadata_vec = createCircleMetadataVector(_builder, md);
+ auto metadata = _builder.CreateVector(std::vector<Offset<Metadata>>(metadata_vec));
+
// create array of buffers
auto buffers = _builder.CreateVector(md._buffers);
- // empty metadata
- std::vector<int> metadata_buffer_vec;
- auto metadata_buffer = _builder.CreateVector(metadata_buffer_vec);
-
// Model
auto model_offset = CreateModel(_builder, version, operator_codes, subgraphs, description,
- buffers, metadata_buffer);
+ buffers, 0 /* metadata_buffer */, metadata);
FinishModelBuffer(_builder, model_offset);
}
std::string description_str = "nnpackage";
auto description = _builder.CreateString(description_str);
+ // Metadata
+ auto metadata_vec = createCircleMetadataVector(_builder, md);
+ auto metadata = _builder.CreateVector(std::vector<Offset<Metadata>>(metadata_vec));
+
// create array of buffers
auto buffers = _builder.CreateVector(md._buffers);
- // empty metadata
- std::vector<int> metadata_buffer_vec;
- auto metadata_buffer = _builder.CreateVector(metadata_buffer_vec);
-
// This version is taken from comment in fbs
constexpr uint32_t version = 0;
// Model
auto model_offset = CreateModel(_builder, version, operator_codes, subgraphs, description,
- buffers, metadata_buffer);
+ buffers, 0 /* metadata_buffer */, metadata);
FinishModelBuffer(_builder, model_offset);
}
#include "SerializedData.h"
-#include "SerializedData.h"
-
#include <mio/circle/schema_generated.h>
#include <loco.h>
//
// NOTE input and output 'feature' map are shape of NHWC
bool same_padding_criterion_1 =
- (static_cast<uint32_t>(ofm._dims[1]) == (ifm._dims[1] - 1) / stride->vertical() + 1) &&
- (static_cast<uint32_t>(ofm._dims[2]) == (ifm._dims[2] - 1) / stride->horizontal() + 1);
+ (static_cast<uint32_t>(ofm._dims[1]) == (ifm._dims[1] - 1) / stride->vertical() + 1) &&
+ (static_cast<uint32_t>(ofm._dims[2]) == (ifm._dims[2] - 1) / stride->horizontal() + 1);
// For same padding, rear padding is same or bigger than front padding by at most 1
bool same_padding_criterion_2 =
- (pad->top() <= pad->bottom()) && (pad->bottom() <= pad->top() + 1) &&
- (pad->left() <= pad->right()) && (pad->right() <= pad->left() + 1);
+ (pad->top() <= pad->bottom()) && (pad->bottom() <= pad->top() + 1) &&
+ (pad->left() <= pad->right()) && (pad->right() <= pad->left() + 1);
if (same_padding_criterion_1 && same_padding_criterion_2)
return circle::Padding_SAME;
#include <luci/IR/CircleNode.h>
#include <luci/IR/CircleNodes.h>
#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Profile/CircleNodeOrigin.h>
#include <luci/UserSettings.h>
#include <luci/Log.h>
void export_pool_2d(ExportContext &ctx, CirclePool2D *node, circle::BuiltinOperator builtin_op)
{
LUCI_ASSERT(builtin_op == circle::BuiltinOperator_MAX_POOL_2D ||
- builtin_op == circle::BuiltinOperator_L2_POOL_2D ||
- builtin_op == circle::BuiltinOperator_AVERAGE_POOL_2D,
+ builtin_op == circle::BuiltinOperator_L2_POOL_2D ||
+ builtin_op == circle::BuiltinOperator_AVERAGE_POOL_2D,
"Should be L2Pool, MaxPool or AvgPool");
LUCI_ASSERT(node->padding() != luci::Padding::UNDEFINED, "Padding is not set");
circle::BuiltinOptions bot, flatbuffers::Offset<void> options_offset)
{
uint32_t op_idx =
- ctx.md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version());
+ ctx.md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version());
std::vector<int32_t> inputs_vec;
std::vector<int32_t> outputs_vec{get_tensor_index(node)};
for (uint32_t i = 0; i < node->arity(); ++i)
void export_node(ExportContext &ctx, loco::Node *node, circle::BuiltinOperator bop)
{
uint32_t op_idx =
- ctx.md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version());
+ ctx.md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version());
std::vector<int32_t> inputs_vec;
std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
for (uint32_t i = 0; i < node->arity(); ++i)
void export_node(ExportContext &ctx, luci::CircleConcatenation *node)
{
uint32_t op_idx =
- ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_CONCATENATION, node->op_version());
+ ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_CONCATENATION, node->op_version());
std::vector<int32_t> inputs_vec;
std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
void export_node(ExportContext &ctx, luci::CircleCustom *node)
{
auto custom_outputs = loco::succs(node);
+ assert(custom_outputs.size() == node->numOutputs());
uint32_t op_idx = ctx.md.registerCustomOpcode(node->custom_code());
std::vector<int32_t> inputs_vec;
uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_NON_MAX_SUPPRESSION_V4,
node->op_version());
std::vector<int32_t> inputs_vec{
- get_tensor_index(node->boxes()), get_tensor_index(node->scores()),
- get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()),
- get_tensor_index(node->score_threshold()),
+ get_tensor_index(node->boxes()), get_tensor_index(node->scores()),
+ get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()),
+ get_tensor_index(node->score_threshold()),
};
std::vector<int32_t> outputs_vec;
auto outputs = ctx.builder.CreateVector(outputs_vec);
auto options = CreateNonMaxSuppressionV4Options(ctx.builder);
auto op_offset =
- CreateOperator(ctx.builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_NonMaxSuppressionV4Options, options.Union());
+ CreateOperator(ctx.builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_NonMaxSuppressionV4Options, options.Union());
ctx.gd._operators.push_back(op_offset);
}
uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_NON_MAX_SUPPRESSION_V5,
node->op_version());
std::vector<int32_t> inputs_vec{
- get_tensor_index(node->boxes()), get_tensor_index(node->scores()),
- get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()),
- get_tensor_index(node->score_threshold()), get_tensor_index(node->soft_nms_sigma()),
+ get_tensor_index(node->boxes()), get_tensor_index(node->scores()),
+ get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()),
+ get_tensor_index(node->score_threshold()), get_tensor_index(node->soft_nms_sigma()),
};
std::vector<int32_t> outputs_vec;
auto outputs = ctx.builder.CreateVector(outputs_vec);
auto options = CreateNonMaxSuppressionV5Options(ctx.builder);
auto op_offset =
- CreateOperator(ctx.builder, op_idx, inputs, outputs,
- circle::BuiltinOptions_NonMaxSuppressionV5Options, options.Union());
+ CreateOperator(ctx.builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_NonMaxSuppressionV5Options, options.Union());
ctx.gd._operators.push_back(op_offset);
}
void export_node(ExportContext &ctx, luci::CircleReverseV2 *node)
{
uint32_t op_idx =
- ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_REVERSE_V2, node->op_version());
+ ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_REVERSE_V2, node->op_version());
std::vector<int32_t> inputs_vec{get_tensor_index(node->tensor()), get_tensor_index(node->axis())};
std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
auto inputs = ctx.builder.CreateVector(inputs_vec);
assert(int32_t(split_outs.size()) == node->num_split());
uint32_t op_idx =
- ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT_V, node->op_version());
+ ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT_V, node->op_version());
std::vector<int32_t> inputs_vec{get_tensor_index(node->input()),
get_tensor_index(node->size_splits()),
get_tensor_index(node->split_dim())};
assert(outs_count == 2);
uint32_t op_idx =
- ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_TOPK_V2, node->op_version());
+ ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_TOPK_V2, node->op_version());
std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), get_tensor_index(node->k())};
std::vector<int32_t> outputs_vec;
auto unique_outs = loco::succs(node);
assert(int32_t(unique_outs.size()) == 2);
uint32_t op_idx =
- ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_UNIQUE, node->op_version());
+ ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_UNIQUE, node->op_version());
std::vector<int32_t> inputs_vec{get_tensor_index(node->input())};
std::vector<int32_t> outputs_vec;
}
uint32_t op_idx =
- ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_UNPACK, node->op_version());
+ ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_UNPACK, node->op_version());
std::vector<int32_t> inputs_vec{get_tensor_index(node->value())};
std::vector<int32_t> outputs_vec;
void visit(luci::CircleAveragePool2D *) final;
void visit(luci::CircleBatchMatMul *) final;
void visit(luci::CircleBatchToSpaceND *) final;
+ void visit(luci::CircleBidirectionalSequenceLSTM *) final;
void visit(luci::CircleCast *) final;
void visit(luci::CircleCeil *) final;
void visit(luci::CircleConcatenation *) final;
void visit(luci::CircleEqual *) final;
void visit(luci::CircleExp *) final;
void visit(luci::CircleExpandDims *) final;
+ void visit(luci::CircleFakeQuant *) final;
void visit(luci::CircleFill *) final;
void visit(luci::CircleFloor *) final;
void visit(luci::CircleFloorDiv *) final;
void visit(luci::CircleOutputDummy *) final {}
void visit(luci::CircleOutputExclude *) final {}
// Virtual for multiple-outputs
+ void visit(luci::CircleBidirectionalSequenceLSTMOut *) final {}
void visit(luci::CircleCustomOut *) final {}
void visit(luci::CircleIfOut *) final {}
void visit(luci::CircleNonMaxSuppressionV4Out *) final {}
void OperationExporter::visit(luci::CircleAdd *node)
{
export_simple(
- node, circle::BuiltinOperator_ADD, circle::BuiltinOptions_AddOptions,
- CreateAddOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
+ node, circle::BuiltinOperator_ADD, circle::BuiltinOptions_AddOptions,
+ CreateAddOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
}
void OperationExporter::visit(luci::CircleAddN *node) { export_node(_ctx, node); }
void OperationExporter::visit(luci::CircleArgMax *node)
{
export_simple(
- node, circle::BuiltinOperator_ARG_MAX, circle::BuiltinOptions_ArgMaxOptions,
- CreateArgMaxOptions(_ctx.builder, to_circle_tensortype(node->output_type())).Union());
+ node, circle::BuiltinOperator_ARG_MAX, circle::BuiltinOptions_ArgMaxOptions,
+ CreateArgMaxOptions(_ctx.builder, to_circle_tensortype(node->output_type())).Union());
}
void OperationExporter::visit(luci::CircleArgMin *node)
{
export_simple(
- node, circle::BuiltinOperator_ARG_MIN, circle::BuiltinOptions_ArgMinOptions,
- CreateArgMinOptions(_ctx.builder, to_circle_tensortype(node->output_type())).Union());
+ node, circle::BuiltinOperator_ARG_MIN, circle::BuiltinOptions_ArgMinOptions,
+ CreateArgMinOptions(_ctx.builder, to_circle_tensortype(node->output_type())).Union());
}
void OperationExporter::visit(luci::CircleAveragePool2D *node)
CreateBatchMatMulOptions(_ctx.builder, node->adj_x(), node->adj_y()).Union());
}
+void OperationExporter::visit(luci::CircleBidirectionalSequenceLSTM *node)
+{
+ auto bidi_lstm_outs = loco::succs(node);
+ assert((bidi_lstm_outs.size() == 1) || (bidi_lstm_outs.size() == 2));
+ uint32_t op_idx = _ctx.md.registerBuiltinOpcode(
+ circle::BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM, node->op_version());
+
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input())};
+ std::vector<int32_t> outputs_vec;
+
+ for (int32_t index = 0; index < 2; index++)
+ {
+ // store in order of index
+ bool found = false;
+ for (auto out : bidi_lstm_outs)
+ {
+ auto bidi_lstm_out = loco::must_cast<luci::CircleBidirectionalSequenceLSTMOut *>(out);
+ if (bidi_lstm_out->index() == index)
+ {
+ outputs_vec.push_back(get_tensor_index(bidi_lstm_out));
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ {
+ INTERNAL_EXN("Invalid BidirectionalSequenceLSTM output");
+ }
+ }
+
+ auto inputs = _ctx.builder.CreateVector(inputs_vec);
+ auto outputs = _ctx.builder.CreateVector(outputs_vec);
+ auto options = CreateBidirectionalSequenceLSTMOptions(
+ _ctx.builder, to_circle_actfunc(node->fusedActivationFunction()), node->cell_clip(),
+ node->proj_clip(), node->merge_outputs(), node->time_major(),
+ node->asymmetric_quantize_inputs());
+ auto op_offset =
+ CreateOperator(_ctx.builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_BidirectionalSequenceLSTMOptions, options.Union());
+ _ctx.gd._operators.push_back(op_offset);
+}
+
void OperationExporter::visit(luci::CircleCast *node) { export_node(_ctx, node); }
void OperationExporter::visit(luci::CircleCeil *node)
node->stride()->w(), node->stride()->h(),
to_circle_actfunc(node->fusedActivationFunction()),
node->dilation()->w(), node->dilation()->h())
- .Union());
+ .Union());
}
void OperationExporter::visit(luci::CircleCos *node)
void OperationExporter::visit(luci::CircleDepthwiseConv2D *node)
{
- export_simple(node, circle::BuiltinOperator_DEPTHWISE_CONV_2D,
- circle::BuiltinOptions_DepthwiseConv2DOptions,
- CreateDepthwiseConv2DOptions(_ctx.builder, getOpPadding(node->padding()),
- node->stride()->w(), node->stride()->h(),
- node->depthMultiplier(),
- to_circle_actfunc(node->fusedActivationFunction()),
- node->dilation()->w(), node->dilation()->h())
- .Union());
+ export_simple(
+ node, circle::BuiltinOperator_DEPTHWISE_CONV_2D, circle::BuiltinOptions_DepthwiseConv2DOptions,
+ CreateDepthwiseConv2DOptions(_ctx.builder, getOpPadding(node->padding()), node->stride()->w(),
+ node->stride()->h(), node->depthMultiplier(),
+ to_circle_actfunc(node->fusedActivationFunction()),
+ node->dilation()->w(), node->dilation()->h())
+ .Union());
}
void OperationExporter::visit(luci::CircleDequantize *node)
void OperationExporter::visit(luci::CircleDiv *node)
{
export_simple(
- node, circle::BuiltinOperator_DIV, circle::BuiltinOptions_DivOptions,
- CreateDivOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
+ node, circle::BuiltinOperator_DIV, circle::BuiltinOptions_DivOptions,
+ CreateDivOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
}
void OperationExporter::visit(luci::CircleElu *node)
CreateExpandDimsOptions(_ctx.builder).Union());
}
+void OperationExporter::visit(luci::CircleFakeQuant *node)
+{
+ export_simple(node, circle::BuiltinOperator_FAKE_QUANT, circle::BuiltinOptions_FakeQuantOptions,
+ CreateFakeQuantOptions(_ctx.builder, node->min(), node->max(), node->num_bits(),
+ node->narrow_range())
+ .Union());
+}
+
void OperationExporter::visit(luci::CircleFill *node)
{
export_simple(node, circle::BuiltinOperator_FILL, circle::BuiltinOptions_FillOptions,
void OperationExporter::visit(luci::CircleFullyConnected *node)
{
export_simple(
- node, circle::BuiltinOperator_FULLY_CONNECTED, circle::BuiltinOptions_FullyConnectedOptions,
- CreateFullyConnectedOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction()),
- to_circle_weightsformat(node->weights_format()))
- .Union());
+ node, circle::BuiltinOperator_FULLY_CONNECTED, circle::BuiltinOptions_FullyConnectedOptions,
+ CreateFullyConnectedOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction()),
+ to_circle_weightsformat(node->weights_format()))
+ .Union());
}
void OperationExporter::visit(luci::CircleGather *node)
void OperationExporter::visit(luci::CircleL2Normalize *node)
{
export_simple(
- node, circle::BuiltinOperator_L2_NORMALIZATION, circle::BuiltinOptions_L2NormOptions,
- CreateL2NormOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction()))
- .Union());
+ node, circle::BuiltinOperator_L2_NORMALIZATION, circle::BuiltinOptions_L2NormOptions,
+ CreateL2NormOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
}
void OperationExporter::visit(luci::CircleL2Pool2D *node)
circle::BuiltinOptions_LocalResponseNormalizationOptions,
CreateLocalResponseNormalizationOptions(_ctx.builder, node->radius(), node->bias(),
node->alpha(), node->beta())
- .Union());
+ .Union());
}
void OperationExporter::visit(luci::CircleLog *node)
void OperationExporter::visit(luci::CircleMirrorPad *node)
{
export_simple(
- node, circle::BuiltinOperator_MIRROR_PAD, circle::BuiltinOptions_MirrorPadOptions,
- CreateMirrorPadOptions(_ctx.builder, to_circle_mirrorpadmode(node->mode())).Union());
+ node, circle::BuiltinOperator_MIRROR_PAD, circle::BuiltinOptions_MirrorPadOptions,
+ CreateMirrorPadOptions(_ctx.builder, to_circle_mirrorpadmode(node->mode())).Union());
}
void OperationExporter::visit(luci::CircleMul *node)
{
export_simple(
- node, circle::BuiltinOperator_MUL, circle::BuiltinOptions_MulOptions,
- CreateMulOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
+ node, circle::BuiltinOperator_MUL, circle::BuiltinOptions_MulOptions,
+ CreateMulOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
}
void OperationExporter::visit(luci::CircleNeg *node)
void OperationExporter::visit(luci::CircleReshape *node)
{
auto new_shape = _ctx.builder.CreateVector<int32_t>(
- node->newShape()->rank(), [node](size_t i) { return node->newShape()->dim(i); });
+ node->newShape()->rank(), [node](size_t i) { return node->newShape()->dim(i); });
export_simple(node, circle::BuiltinOperator_RESHAPE, circle::BuiltinOptions_ReshapeOptions,
CreateReshapeOptions(_ctx.builder, new_shape).Union());
void OperationExporter::visit(luci::CircleResizeBilinear *node)
{
export_simple(
- node, circle::BuiltinOperator_RESIZE_BILINEAR, circle::BuiltinOptions_ResizeBilinearOptions,
- CreateResizeBilinearOptions(_ctx.builder, node->align_corners(), node->half_pixel_centers())
- .Union());
+ node, circle::BuiltinOperator_RESIZE_BILINEAR, circle::BuiltinOptions_ResizeBilinearOptions,
+ CreateResizeBilinearOptions(_ctx.builder, node->align_corners(), node->half_pixel_centers())
+ .Union());
}
void OperationExporter::visit(luci::CircleResizeNearestNeighbor *node)
void OperationExporter::visit(luci::CircleReverseSequence *node)
{
export_simple(
- node, circle::BuiltinOperator_REVERSE_SEQUENCE, circle::BuiltinOptions_ReverseSequenceOptions,
- CreateReverseSequenceOptions(_ctx.builder, node->seq_axis(), node->batch_axis()).Union());
+ node, circle::BuiltinOperator_REVERSE_SEQUENCE, circle::BuiltinOptions_ReverseSequenceOptions,
+ CreateReverseSequenceOptions(_ctx.builder, node->seq_axis(), node->batch_axis()).Union());
}
void OperationExporter::visit(luci::CircleReverseV2 *node) { export_node(_ctx, node); }
CreateStridedSliceOptions(_ctx.builder, node->begin_mask(), node->end_mask(),
node->ellipsis_mask(), node->new_axis_mask(),
node->shrink_axis_mask())
- .Union());
+ .Union());
}
void OperationExporter::visit(luci::CircleSub *node)
{
export_simple(
- node, circle::BuiltinOperator_SUB, circle::BuiltinOptions_SubOptions,
- CreateSubOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
+ node, circle::BuiltinOperator_SUB, circle::BuiltinOptions_SubOptions,
+ CreateSubOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
}
void OperationExporter::visit(luci::CircleSum *node)
circle::BuiltinOptions_TransposeConvOptions,
CreateTransposeConvOptions(_ctx.builder, getOpPadding(node->padding()),
node->stride()->w(), node->stride()->h())
- .Union());
+ .Union());
}
void OperationExporter::visit(luci::CircleUnidirectionalSequenceLSTM *node)
export_simple(node, circle::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM,
circle::BuiltinOptions_UnidirectionalSequenceLSTMOptions,
CreateUnidirectionalSequenceLSTMOptions(
- _ctx.builder, to_circle_actfunc(node->fusedActivationFunction()),
- node->cell_clip(), node->proj_clip(), node->time_major(),
- node->asymmetric_quantize_inputs())
- .Union());
+ _ctx.builder, to_circle_actfunc(node->fusedActivationFunction()),
+ node->cell_clip(), node->proj_clip(), node->time_major(),
+ node->asymmetric_quantize_inputs())
+ .Union());
}
void OperationExporter::visit(luci::CircleUnique *node) { export_node(_ctx, node); }
circle::BuiltinOptions_BCQFullyConnectedOptions,
CreateBCQFullyConnectedOptions(_ctx.builder, node->weights_hidden_size(),
to_circle_actfunc(node->fusedActivationFunction()))
- .Union());
+ .Union());
}
void OperationExporter::visit(luci::CircleBCQGather *node)
{
export_simple(
- node, circle::BuiltinOperator_BCQ_GATHER, circle::BuiltinOptions_BCQGatherOptions,
- CreateBCQGatherOptions(_ctx.builder, node->input_hidden_size(), node->axis()).Union());
+ node, circle::BuiltinOperator_BCQ_GATHER, circle::BuiltinOptions_BCQGatherOptions,
+ CreateBCQGatherOptions(_ctx.builder, node->input_hidden_size(), node->axis()).Union());
}
void OperationExporter::visit(luci::CircleInstanceNorm *node)
circle::BuiltinOptions_InstanceNormOptions,
CreateInstanceNormOptions(_ctx.builder, node->epsilon(),
to_circle_actfunc(node->fusedActivationFunction()))
- .Union());
+ .Union());
}
void exportNode(loco::Node *node, flatbuffers::FlatBufferBuilder &builder, SerializedModelData &md,
{
ExportContext ctx{builder, md, gd};
OperationExporter exporter{ctx};
+
+ const auto ops_size = gd._operators.size();
+
circle_node->accept(&exporter);
+ if (has_origin(circle_node) && ops_size != gd._operators.size())
+ {
+ const auto node_id = gd._operators.size() - 1;
+ for (auto source : get_origin(circle_node)->sources())
+ {
+ md._metadata.add_source_table(source->id(), source->name());
+ md._metadata.add_op_table(node_id, source->id());
+ }
+ }
}
else
{
*/
#include "CircleTensorExporter.h"
-#include "TypeBridge.h"
#include <luci/IR/CircleNodes.h>
#include <luci/IR/CircleNodeVisitor.h>
-#include <luci/IR/CircleShapeSignature.h>
#include <luci/Service/CircleTypeInference.h>
#include <luci/Service/CircleShapeInference.h>
#include <luci/Log.h>
using namespace luci;
-class CircleTensoInfo
+class CircleTensorInfo
{
public:
- CircleTensoInfo() = default;
+ CircleTensorInfo() = default;
public:
void name(const std::string &name) { _name = name; }
const ShapeDescription &shape(void) const { return _shape; }
void shape(const ShapeDescription &shape) { _shape = shape; }
- const ShapeSignature &shape_signature(void) const { return _shape_signature; }
- void shape_signature(const ShapeSignature &ss) { _shape_signature = ss; }
-
luci::ShapeStatus shape_status(void) const { return _shape_status; }
void shape_status(luci::ShapeStatus ss) { _shape_status = ss; }
circle::TensorType _dtype{circle::TensorType_FLOAT32};
ShapeDescription _shape{};
- ShapeSignature _shape_signature;
luci::ShapeStatus _shape_status{luci::ShapeStatus::UNDEFINED};
luci::CircleConst *_content = nullptr;
luci::SparsityParam *_sparsityparam = nullptr;
};
-using CircleTensorContext = std::vector<CircleTensoInfo>;
+class CircleTensorContext
+{
+public:
+ CircleTensorContext() = default;
+
+public:
+ void emplace_back(CircleTensorInfo &ti)
+ {
+ assert(_names.find(ti.name()) == _names.end());
+ _tis.emplace_back(ti);
+ _names.insert(ti.name());
+ }
+ size_t size(void) const { return _tis.size(); }
+ std::vector<CircleTensorInfo>::iterator begin(void) { return _tis.begin(); }
+ std::vector<CircleTensorInfo>::iterator end(void) { return _tis.end(); }
+
+public:
+ bool exist(const std::string &name) const { return _names.find(name) != _names.end(); }
+
+private:
+ std::vector<CircleTensorInfo> _tis;
+ std::set<std::string> _names;
+};
struct NoOpDetector final : public luci::CircleNodeMutableVisitor<bool>
{
auto tensor_index = static_cast<CircleTensorIndex>(ctx.size());
// TODO Use Graph-level metadata for Input & Output
- // auto tensor_name = "t_" + std::to_string(tensor_index);
std::string tensor_name = node->name();
- if (tensor_name.empty())
- tensor_name = "t_" + std::to_string(tensor_index);
+ // NOTE tensor_name maybe empty. this assertion will alert when this happens.
+ // currently we require tensor should have a name.
+ // TODO if this breaks, fix the cause or permit empty tensor_name.
+ assert(!tensor_name.empty());
+ if (ctx.exist(tensor_name))
+ {
+ // NOTE this should assign unique name for a Tensor.
+ tensor_name = tensor_name + "_" + std::to_string(tensor_index);
+ assert(!ctx.exist(tensor_name));
+ }
INFO(l) << "[luci] Tensor for " << tensor_name << ": " << tensor_index << std::endl;
- CircleTensoInfo tensor_info;
+ CircleTensorInfo tensor_info;
tensor_info.name(tensor_name);
tensor_info.dtype(to_circle_tensortype(node->dtype()));
- tensor_info.shape_signature(node->shape_signature());
if (node->shape_status() == ShapeStatus::VALID)
tensor_info.shape(to_shape_description(node));
tensor_info.shape_status(node->shape_status());
}
public:
+ bool visit(luci::CircleBidirectionalSequenceLSTMOut *) final { return true; }
+ bool visit(luci::CircleCustomOut *) final { return true; }
bool visit(luci::CircleIfOut *) final { return true; }
+ bool visit(luci::CircleNonMaxSuppressionV4Out *) final { return true; }
+ bool visit(luci::CircleNonMaxSuppressionV5Out *) final { return true; }
bool visit(luci::CircleSplitOut *) final { return true; }
bool visit(luci::CircleSplitVOut *) final { return true; }
bool visit(luci::CircleTopKV2Out *) final { return true; }
bool visit(luci::CircleUnpackOut *) final { return true; }
+ bool visit(luci::CircleUniqueOut *) final { return true; }
bool visit(luci::CircleWhileOut *) final { return true; }
+ bool visit(luci::CircleBidirectionalSequenceLSTM *node) final
+ {
+ if (node->merge_outputs())
+ {
+ store_outputs(node, 1);
+ }
+ else
+ {
+ store_outputs(node, 2);
+ }
+ return true;
+ }
+
+ bool visit(luci::CircleCustom *node) final
+ {
+ store_outputs(node, node->numOutputs());
+ return true;
+ }
+
bool visit(luci::CircleIf *node) final
{
store_outputs(node, node->output_count());
return true;
}
+ bool visit(luci::CircleNonMaxSuppressionV4 *node) final
+ {
+ store_outputs(node, 2);
+ return true;
+ }
+
+ bool visit(luci::CircleNonMaxSuppressionV5 *node) final
+ {
+ store_outputs(node, 3);
+ return true;
+ }
+
bool visit(luci::CircleSplit *node) final
{
store_outputs(node, uint32_t(node->num_split()));
return true;
}
+ bool visit(luci::CircleUnique *node) final
+ {
+ store_outputs(node, 2);
+ return true;
+ }
+
bool visit(luci::CircleWhile *node) final
{
store_outputs(node, node->output_count());
const ShapeDescription &shape)
{
assert(shape._rank_known && "unknown number of dimensions is not supported");
- return builder.CreateVector(shape._dims);
+
+ std::vector<int32_t> encoded_shape;
+ encoded_shape.resize(shape._dims.size());
+ for (uint32_t i = 0; i < shape._dims.size(); ++i)
+ encoded_shape.at(i) = shape._dims.at(i) == -1 ? 1 : shape._dims.at(i);
+
+ return builder.CreateVector(encoded_shape);
}
flatbuffers::Offset<Vector<int32_t>> encodeShapeSignature(FlatBufferBuilder &builder,
- const ShapeSignature &shape_signature)
+ const ShapeDescription &shape)
{
- if (shape_signature.rank() == 0)
- return 0;
+ assert(shape._rank_known && "unknown number of dimensions is not supported");
+
+ // shape_signature is set if and only if at least one of dimensions are unknown.
+ for (uint32_t i = 0; i < shape._dims.size(); ++i)
+ if (shape._dims.at(i) == -1)
+ return builder.CreateVector(shape._dims);
- return builder.CreateVector(shape_signature.as_vector());
+ return flatbuffers::Offset<Vector<int32_t>>();
}
flatbuffers::Offset<circle::Buffer> encodeOpBuffer(FlatBufferBuilder &builder)
// array_segments
auto circle_array_segments = to_circle_sparse_index_vector(builder, it.array_segments());
auto circle_array_segments_type =
- to_circle_sparse_index_vector_type(it.array_segments().type());
+ to_circle_sparse_index_vector_type(it.array_segments().type());
// array_indices
auto circle_array_indices = to_circle_sparse_index_vector(builder, it.array_indices());
auto circle_array_indices_type = to_circle_sparse_index_vector_type(it.array_indices().type());
auto dim_metadata = circle::CreateDimensionMetadata(
- builder, to_circle_dimensiontype(it.format()), it.dense_size(), circle_array_segments_type,
- circle_array_segments, circle_array_indices_type, circle_array_indices);
+ builder, to_circle_dimensiontype(it.format()), it.dense_size(), circle_array_segments_type,
+ circle_array_segments, circle_array_indices_type, circle_array_indices);
dim_metadata_vec.emplace_back(dim_metadata);
}
&sparsityparam->block_map, &dim_metadata_vec);
}
+template <loco::DataType DT> bool has_same_elements(luci::CircleConst *lhs, luci::CircleConst *rhs)
+{
+ assert(lhs->dtype() == DT);
+ assert(rhs->dtype() == DT);
+ assert(lhs->size<DT>() == rhs->size<DT>());
+
+ for (uint32_t i = 0; i < lhs->size<DT>(); ++i)
+ if (lhs->at<DT>(i) != rhs->at<DT>(i))
+ return false;
+ return true;
+}
+
bool has_same_values(luci::CircleConst *lhs, luci::CircleConst *rhs)
{
if (lhs->dtype() != rhs->dtype())
switch (lhs->dtype())
{
case loco::DataType::FLOAT32:
- for (uint32_t i = 0; i < lhs->size<loco::DataType::FLOAT32>(); ++i)
- if (lhs->at<loco::DataType::FLOAT32>(i) != rhs->at<loco::DataType::FLOAT32>(i))
- return false;
- break;
+ return has_same_elements<loco::DataType::FLOAT32>(lhs, rhs);
+
+ case loco::DataType::S8:
+ return has_same_elements<loco::DataType::S8>(lhs, rhs);
+
+ case loco::DataType::S16:
+ return has_same_elements<loco::DataType::S16>(lhs, rhs);
case loco::DataType::S32:
- for (uint32_t i = 0; i < lhs->size<loco::DataType::S32>(); ++i)
- if (lhs->at<loco::DataType::S32>(i) != rhs->at<loco::DataType::S32>(i))
- return false;
- break;
+ return has_same_elements<loco::DataType::S32>(lhs, rhs);
case loco::DataType::S64:
- for (uint32_t i = 0; i < lhs->size<loco::DataType::S64>(); ++i)
- if (lhs->at<loco::DataType::S64>(i) != rhs->at<loco::DataType::S64>(i))
- return false;
- break;
+ return has_same_elements<loco::DataType::S64>(lhs, rhs);
+
+ case loco::DataType::U8:
+ return has_same_elements<loco::DataType::U8>(lhs, rhs);
case loco::DataType::BOOL:
- for (uint32_t i = 0; i < lhs->size<loco::DataType::BOOL>(); ++i)
- if (lhs->at<loco::DataType::BOOL>(i) != rhs->at<loco::DataType::BOOL>(i))
- return false;
- break;
+ return has_same_elements<loco::DataType::BOOL>(lhs, rhs);
default:
- return false;
+ break;
}
- return true;
+ return false;
}
uint32_t get_buffer_id(FlatBufferBuilder &builder, SerializedModelData &md, luci::CircleConst *node)
}
}
-void exportOpDefinedTensor(const CircleTensoInfo &info, FlatBufferBuilder &builder,
+void exportOpDefinedTensor(const CircleTensorInfo &info, FlatBufferBuilder &builder,
SerializedModelData &md, SerializedGraphData &gd)
{
// Create and register output tensor shape
flatbuffers::Offset<Vector<int32_t>> shape_offset;
+ flatbuffers::Offset<Vector<int32_t>> shape_signature_offset;
if (info.shape_status() == ShapeStatus::VALID)
+ {
shape_offset = encodeShape(builder, info.shape());
+ shape_signature_offset = encodeShapeSignature(builder, info.shape());
+ }
auto quantparam = encodeQuantizationParameters(builder, info.quantparam());
auto sparsityparam = encodeSparsityParameters(builder, info.sparsityparam());
- auto shape_signature_offset = encodeShapeSignature(builder, info.shape_signature());
-
auto buffer_id = get_buffer_id(builder, md, info.content());
auto name_offset = builder.CreateString(info.name());
auto tensor_offset =
- CreateTensor(builder, shape_offset, info.dtype(), buffer_id, name_offset, quantparam,
- /*is_variable*/ false, sparsityparam, shape_signature_offset);
+ CreateTensor(builder, shape_offset, info.dtype(), buffer_id, name_offset, quantparam,
+ /*is_variable*/ false, sparsityparam, shape_signature_offset);
gd._tensors.push_back(tensor_offset);
}
#include "Optimize.h"
#include "ProgressReporter.h"
-#include <luci/Pass/ShapeInferencePass.h>
-#include <luci/Pass/ShapeSignatureInferencePass.h>
-#include <luci/Pass/TypeInferencePass.h>
+#include <luci/Pass/CircleShapeInferencePass.h>
+#include <luci/Pass/CircleTypeInferencePass.h>
#include <logo/Phase.h>
logo::Phase phase;
{
// prepare type and shape before optimization
- phase.emplace_back(std::make_unique<TypeInferencePass>());
- phase.emplace_back(std::make_unique<ShapeInferencePass>());
- phase.emplace_back(std::make_unique<ShapeSignatureInferencePass>());
+ phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+ phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
// TODO add more optimization passes (with a knob)
}
{
public:
ProgressReporter(loco::Graph *graph, logo::PhaseStrategy strategy)
- : _graph{graph}, _strategy{strategy}
+ : _graph{graph}, _strategy{strategy}
{
// DO NOTHING
}
}
};
+class CircleExportMetadata
+{
+public:
+ void add_source_table(uint32_t source_id, std::string origin_name)
+ {
+ // Model with multiple subgraph may have different origin_name
+ // even if source_id is same. However, as we do not consider about
+ // multiple subgraph in profiling for now, just do not care those cases
+ // and support them correctly in the future.
+ _source_table.emplace(source_id, origin_name);
+ }
+
+ void add_op_table(uint32_t node_id, uint32_t source_id)
+ {
+ // Model with multiple subgraph may have duplicated node id.
+ // For now, as we do not consider about multiple subgraph in profiling,
+ // just ignore those cases and support them in the future.
+ if (_op_table.find(node_id) == _op_table.end())
+ _op_table.emplace(node_id, std::set<uint32_t>());
+ _op_table.at(node_id).emplace(source_id);
+ }
+
+public:
+ const std::vector<uint8_t> encoded_source_table(void);
+ const std::vector<uint8_t> encoded_op_table(void);
+
+private:
+ std::map<uint32_t, std::string> _source_table;
+ std::map<uint32_t, std::set<uint32_t>> _op_table;
+};
+
} // namespace luci
namespace std
std::unordered_map<OpCode, uint32_t> _operator_codes;
std::vector<flatbuffers::Offset<circle::Buffer>> _buffers;
+ CircleExportMetadata _metadata;
// This is used for removing buffers with same values
std::map<luci::CircleConst *, uint32_t> _cached_buffer_id;
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TypeBridge.h"
-
-#include "CircleExporterUtils.h"
-
-#include <luci/IR/CircleNodes.h>
-#include <luci/IR/CircleNodeVisitor.h>
-#include <luci/Service/CircleTypeInference.h>
-#include <luci/Service/CircleShapeInference.h>
-
-#include <loco/Service/TypeInference.h>
-#include <loco/Service/ShapeInference.h>
-
-namespace
-{
-
-/**
- * @brief CopySelector will return condition of copy shape/type inference to node
- */
-struct CopySelector final : public luci::CircleNodeVisitor<bool>
-{
- // return false(don't copy) for nodes that provides shape/type from nature
- bool visit(const luci::CircleInput *) final { return false; }
- bool visit(const luci::CircleConst *) final { return false; }
-
- // default is copy attributes
- bool visit(const luci::CircleNode *) { return true; }
-};
-
-} // namespace
-
-namespace luci
-{
-
-loco::TensorShape node_shape(CircleNode *node)
-{
- loco::TensorShape shape;
-
- shape.rank(node->rank());
- for (uint32_t r = 0; r < node->rank(); ++r)
- {
- shape.dim(r) = loco::Dimension(node->dim(r).value());
- }
- return shape;
-}
-
-loco::DataType node_dtype(CircleNode *node) { return node->dtype(); }
-
-void copy_shape_dtype(loco::Graph *graph)
-{
- /**
- * @note We will iterate all the nodes in the graph to include dangle nodes
- */
- auto nodes = graph->nodes();
- for (uint32_t n = 0; n < nodes->size(); ++n)
- {
- auto node = loco::must_cast<luci::CircleNode *>(nodes->at(n));
-
- CopySelector cs;
- if (node->accept(&cs))
- {
- // NOTE not all nodes have infered shape/dtype: multiple outs may not be
- // visited when outputs are not used
- // TODO fix shape inference traversal
- // NOTE when loco supports multiple outputs in nature this issue should be
- // resolved also
-
- if (loco::dtype_known(node))
- {
- node->dtype(loco::dtype_get(node));
- }
-
- if (loco::shape_known(node))
- {
- auto shape = loco::shape_get(node).as<loco::TensorShape>();
- node->rank(shape.rank());
- for (uint32_t r = 0; r < shape.rank(); ++r)
- {
- node->dim(r) = loco::Dimension(shape.dim(r).value());
- }
-
- // ShapeStatus should be update only when the status was UNDEFINED
- if (node->shape_status() == ShapeStatus::UNDEFINED)
- node->shape_status(ShapeStatus::VALID);
- }
- }
- }
-}
-
-} // namespace luci
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __TYPE_BRIDGE_H__
-#define __TYPE_BRIDGE_H__
-
-#include <luci/IR/CircleNode.h>
-
-#include <loco.h>
-
-namespace luci
-{
-
-/**
- * @brief node_shape() will return loco::TensorShape of CircleNode
- */
-loco::TensorShape node_shape(CircleNode *node);
-
-/**
- * @brief node_dtype() will return loco::DataType of CircleNode
- */
-loco::DataType node_dtype(CircleNode *node);
-
-/**
- * @brief copy_shape_dtype() will copy shape and dtype inference data to CircleNode
- */
-void copy_shape_dtype(loco::Graph *graph);
-
-} // namespace luci
-
-#endif // __TYPE_BRIDGE_H__
target_include_directories(luci_import PRIVATE src)
target_include_directories(luci_import PUBLIC include)
target_link_libraries(luci_import PUBLIC luci_lang)
+target_link_libraries(luci_import PUBLIC luci_profile)
target_link_libraries(luci_import PUBLIC mio_circle)
target_link_libraries(luci_import PRIVATE luci_env)
target_link_libraries(luci_import PRIVATE luci_log)
#include <luci/IR/AttrPadding.h>
#include <luci/IR/CircleNode.h>
#include <luci/IR/CircleQuantParam.h>
-#include <luci/IR/CircleShapeSignature.h>
#include <luci/IR/SparsityParam.h>
#include <loco.h>
using CircleTensors_t = std::vector<std::unique_ptr<circle::TensorT>>;
using CircleOperators_t = std::vector<std::unique_ptr<circle::OperatorT>>;
using CircleOperatorCodes_t = std::vector<std::unique_ptr<circle::OperatorCodeT>>;
+ using CircleMetadata_t = std::vector<std::unique_ptr<circle::MetadataT>>;
using CircleSubGraphsPtr_t = flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>;
using CircleTensorsPtr_t = flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>;
const std::vector<int32_t> &inputs() const { return _current_subgraph->inputs; }
const std::vector<int32_t> &outputs() const { return _current_subgraph->outputs; }
const std::string &name() const { return _current_subgraph->name; }
+ const circle::DataFormat &data_format() const { return _current_subgraph->data_format; }
+ const CircleMetadata_t &metadata() const { return _model->metadata; }
const CircleTensorsPtr_t *tensors_ptr() const { return _tensors_ptr; }
public:
virtual ~GraphBuilder() = default;
- void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+ // common validate method to check number of inputs and single output
+ bool validate(const ValidateArgs &args, size_t input_cnt) const
+ {
+ return (args.op.inputs.size() == input_cnt && args.op.outputs.size() == 1);
+ }
+
+ CircleNode *build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
private:
virtual CircleNode *build_node(const circle::OperatorT &op,
#include "GraphBuilderContext.h"
+#include <luci/IR/CircleNode.h>
+
#include <mio/circle/schema_generated.h>
namespace luci
};
virtual bool validate(const ValidateArgs &) const = 0;
- virtual void build(const circle::OperatorT &op, GraphBuilderContext *context) const = 0;
+ virtual CircleNode *build(const circle::OperatorT &op, GraphBuilderContext *context) const = 0;
virtual ~GraphBuilderBase() = default;
};
public:
GraphBuilderContext(loco::Graph *g, CircleReader *reader, IndexNodeFinder *nodefinder,
IndexTensorOutputs *tensoroutputs)
- : _g(g), _reader(reader), _indexnodefinder(nodefinder), _indextensoroutputs(tensoroutputs)
+ : _g(g), _reader(reader), _indexnodefinder(nodefinder), _indextensoroutputs(tensoroutputs)
{
// DO NOTHING
}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_GRAPH_BUILDER_MULTI_OUTPUT_H__
+#define __LUCI_IMPORT_GRAPH_BUILDER_MULTI_OUTPUT_H__
+
+#include "GraphBuilderContext.h"
+#include "GraphBuilderBase.h"
+
+#include <mio/circle/schema_generated.h>
+
+namespace luci
+{
+
+/**
+ * @brief Base of general multiple outputs graph builder(e.g., CircleIfGraphBuilder)
+ */
+class GraphBuilderMultiOutput : public GraphBuilderBase
+{
+public:
+ virtual ~GraphBuilderMultiOutput() = default;
+
+ CircleNode *build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+
+protected:
+ struct BuildNodeArgs
+ {
+ BuildNodeArgs(const circle::OperatorT &o, GraphBuilderContext *c,
+ const std::vector<CircleNode *> &i)
+ : op(o), context(c), input_nodes(i)
+ {
+ }
+
+ const circle::OperatorT &op;
+ GraphBuilderContext *context;
+ const std::vector<CircleNode *> &input_nodes;
+ };
+
+ struct BuildOutArgs
+ {
+ BuildOutArgs(CircleNode *nd, uint32_t n) : node(nd), index(n) {}
+
+ CircleNode *node;
+ uint32_t index;
+ };
+
+private:
+ virtual CircleNode *build_node(const BuildNodeArgs &) const = 0;
+ virtual CircleNode *build_out(const BuildOutArgs &) const = 0;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_GRAPH_BUILDER_MULTI_OUTPUT_H__
#include "Nodes/CircleBatchToSpaceND.h"
#include "Nodes/CircleBCQFullyConnected.h"
#include "Nodes/CircleBCQGather.h"
+#include "Nodes/CircleBidirectionalSequenceLSTM.h"
#include "Nodes/CircleCast.h"
#include "Nodes/CircleCeil.h"
#include "Nodes/CircleConcatenation.h"
#include "Nodes/CircleEqual.h"
#include "Nodes/CircleExp.h"
#include "Nodes/CircleExpandDims.h"
+#include "Nodes/CircleFakeQuant.h"
#include "Nodes/CircleFill.h"
#include "Nodes/CircleFloor.h"
#include "Nodes/CircleFloorDiv.h"
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_BIDIRECTIONALSEQUENCE_LSTM_H__
+#define __LUCI_IMPORT_OP_CIRCLE_BIDIRECTIONALSEQUENCE_LSTM_H__
+
+#include "luci/Import/GraphBuilderMultiOutput.h"
+
+namespace luci
+{
+
+class CircleBidirectionalSequenceLSTMGraphBuilder : public GraphBuilderMultiOutput
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const BuildNodeArgs &) const final;
+ CircleNode *build_out(const BuildOutArgs &) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_BIDIRECTIONALSEQUENCE_LSTM_H__
#ifndef __LUCI_IMPORT_OP_CIRCLE_CUSTOM_H__
#define __LUCI_IMPORT_OP_CIRCLE_CUSTOM_H__
-#include "luci/Import/GraphBuilder.h"
+#include "luci/Import/GraphBuilderMultiOutput.h"
namespace luci
{
-class CircleCustomGraphBuilder : public GraphBuilderBase
+class CircleCustomGraphBuilder : public GraphBuilderMultiOutput
{
public:
bool validate(const ValidateArgs &args) const final;
- void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+private:
+ CircleNode *build_node(const BuildNodeArgs &) const final;
+ CircleNode *build_out(const BuildOutArgs &) const final;
};
} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_FAKE_QUANT_H__
+#define __LUCI_IMPORT_OP_CIRCLE_FAKE_QUANT_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleFakeQuantGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_FAKE_QUANT_H__
#ifndef __LUCI_IMPORT_OP_CIRCLE_IF_H__
#define __LUCI_IMPORT_OP_CIRCLE_IF_H__
-#include "luci/Import/GraphBuilderBase.h"
+#include "luci/Import/GraphBuilderMultiOutput.h"
namespace luci
{
-class CircleIfGraphBuilder : public GraphBuilderBase
+class CircleIfGraphBuilder : public GraphBuilderMultiOutput
{
public:
bool validate(const ValidateArgs &args) const final;
- void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+private:
+ CircleNode *build_node(const BuildNodeArgs &) const final;
+ CircleNode *build_out(const BuildOutArgs &) const final;
};
} // namespace luci
#ifndef __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V4_H__
#define __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V4_H__
-#include "luci/Import/GraphBuilderBase.h"
+#include "luci/Import/GraphBuilderMultiOutput.h"
namespace luci
{
-class CircleNonMaxSuppressionV4GraphBuilder : public GraphBuilderBase
+class CircleNonMaxSuppressionV4GraphBuilder : public GraphBuilderMultiOutput
{
public:
bool validate(const ValidateArgs &args) const final;
- void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+private:
+ CircleNode *build_node(const BuildNodeArgs &) const final;
+ CircleNode *build_out(const BuildOutArgs &) const final;
};
} // namespace luci
#ifndef __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V5_H__
#define __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V5_H__
-#include "luci/Import/GraphBuilderBase.h"
+#include "luci/Import/GraphBuilderMultiOutput.h"
namespace luci
{
-class CircleNonMaxSuppressionV5GraphBuilder : public GraphBuilderBase
+class CircleNonMaxSuppressionV5GraphBuilder : public GraphBuilderMultiOutput
{
public:
bool validate(const ValidateArgs &args) const final;
- void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+private:
+ CircleNode *build_node(const BuildNodeArgs &) const final;
+ CircleNode *build_out(const BuildOutArgs &) const final;
};
} // namespace luci
#ifndef __LUCI_IMPORT_OP_CIRCLE_SPLIT_H__
#define __LUCI_IMPORT_OP_CIRCLE_SPLIT_H__
-#include "luci/Import/GraphBuilderBase.h"
+#include "luci/Import/GraphBuilderMultiOutput.h"
namespace luci
{
-class CircleSplitGraphBuilder : public GraphBuilderBase
+class CircleSplitGraphBuilder : public GraphBuilderMultiOutput
{
public:
bool validate(const ValidateArgs &args) const final;
- void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+private:
+ CircleNode *build_node(const BuildNodeArgs &) const final;
+ CircleNode *build_out(const BuildOutArgs &) const final;
};
} // namespace luci
#ifndef __LUCI_IMPORT_OP_CIRCLE_SPLIT_V_H__
#define __LUCI_IMPORT_OP_CIRCLE_SPLIT_V_H__
-#include "luci/Import/GraphBuilderBase.h"
+#include "luci/Import/GraphBuilderMultiOutput.h"
namespace luci
{
-class CircleSplitVGraphBuilder : public GraphBuilderBase
+class CircleSplitVGraphBuilder : public GraphBuilderMultiOutput
{
public:
bool validate(const ValidateArgs &args) const final;
- void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+private:
+ CircleNode *build_node(const BuildNodeArgs &) const final;
+ CircleNode *build_out(const BuildOutArgs &) const final;
};
} // namespace luci
#ifndef __LUCI_IMPORT_OP_CIRCLE_TOPK_V2_H__
#define __LUCI_IMPORT_OP_CIRCLE_TOPK_V2_H__
-#include "luci/Import/GraphBuilderBase.h"
+#include "luci/Import/GraphBuilderMultiOutput.h"
namespace luci
{
-class CircleTopKV2GraphBuilder : public GraphBuilderBase
+class CircleTopKV2GraphBuilder : public GraphBuilderMultiOutput
{
public:
bool validate(const ValidateArgs &args) const final;
- void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+private:
+ CircleNode *build_node(const BuildNodeArgs &) const final;
+ CircleNode *build_out(const BuildOutArgs &) const final;
};
} // namespace luci
#ifndef __LUCI_IMPORT_OP_CIRCLE_UNIQUE_H__
#define __LUCI_IMPORT_OP_CIRCLE_UNIQUE_H__
-#include "luci/Import/GraphBuilderBase.h"
+#include "luci/Import/GraphBuilderMultiOutput.h"
namespace luci
{
-class CircleUniqueGraphBuilder : public GraphBuilderBase
+class CircleUniqueGraphBuilder : public GraphBuilderMultiOutput
{
public:
bool validate(const ValidateArgs &args) const final;
- void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+private:
+ CircleNode *build_node(const BuildNodeArgs &) const final;
+ CircleNode *build_out(const BuildOutArgs &) const final;
};
} // namespace luci
#ifndef __LUCI_IMPORT_OP_CIRCLE_UNPACK_H__
#define __LUCI_IMPORT_OP_CIRCLE_UNPACK_H__
-#include "luci/Import/GraphBuilderBase.h"
+#include "luci/Import/GraphBuilderMultiOutput.h"
namespace luci
{
-class CircleUnpackGraphBuilder : public GraphBuilderBase
+class CircleUnpackGraphBuilder : public GraphBuilderMultiOutput
{
public:
bool validate(const ValidateArgs &args) const final;
- void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+private:
+ CircleNode *build_node(const BuildNodeArgs &) const final;
+ CircleNode *build_out(const BuildOutArgs &) const final;
};
} // namespace luci
public:
bool validate(const ValidateArgs &args) const final;
- void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+ CircleNode *build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
};
} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleImportMetadata.h"
+
+#include <vector>
+
+namespace
+{
+
+uint32_t read_u32(const std::vector<uint8_t> &buffer, uint32_t idx)
+{
+ uint32_t val = 0;
+ val += (buffer.at(idx + 0) << 0 * 8);
+ val += (buffer.at(idx + 1) << 1 * 8);
+ val += (buffer.at(idx + 2) << 2 * 8);
+ val += (buffer.at(idx + 3) << 3 * 8);
+ return val;
+}
+
+} // namespace
+
+namespace
+{
+
+// 'source_table' is decoded to std::map<uint32_t, std::string> format.
+const std::map<uint32_t, std::string>
+decoded_source_table(const std::vector<uint8_t> &source_table_data)
+{
+ std::map<uint32_t, std::string> source_id_name_map;
+ uint32_t idx = 0;
+
+ if (source_table_data.size() < 4)
+ throw std::runtime_error("Source table decode error : invalid entry number");
+
+ uint32_t entry_number = read_u32(source_table_data, idx);
+ idx += sizeof(uint32_t);
+
+ while (idx < source_table_data.size())
+ {
+ if (idx + 2 * sizeof(uint32_t) > source_table_data.size())
+ throw std::runtime_error("Source table decode error : invalid entry item");
+
+ uint32_t id = read_u32(source_table_data, idx);
+ idx += sizeof(uint32_t);
+
+ uint32_t length = read_u32(source_table_data, idx);
+ idx += sizeof(uint32_t);
+
+ if (idx + sizeof(char) * length > source_table_data.size())
+ throw std::runtime_error("Source table decode error : invalid entry data");
+
+ // The last character of name is '\0'.
+ // However, as std::string do not use '\0' for finding the end of string,
+ // we ignore the character and do not include it in the string.
+ std::string origin_name;
+ for (uint32_t j = 0; j < length - 1; ++j)
+ origin_name += source_table_data.at(idx + j);
+ assert(source_table_data.at(idx + length - 1) == '\0');
+ idx += sizeof(char) * length;
+
+ if (source_id_name_map.insert({id, origin_name}).second == false)
+ throw std::runtime_error("Source table decode error : duplicated origin ID");
+ }
+
+ if (idx != source_table_data.size())
+ throw std::runtime_error("Source table decode error : data size invalid");
+
+ if (source_id_name_map.size() != entry_number)
+ throw std::runtime_error("Source table decode error : result size mismatch");
+
+ return source_id_name_map;
+}
+
+// 'op_table' is decoded to std::map<uint32_t, std::set<uint32_t>> format.
+const std::map<uint32_t, std::set<uint32_t>>
+decoded_op_table(const std::vector<uint8_t> &op_table_data)
+{
+ std::map<uint32_t, std::set<uint32_t>> node_source_ids_map;
+ uint32_t idx = 0;
+
+ if (op_table_data.size() < 4)
+ throw std::runtime_error("Op table decode error : invalid entry number");
+
+ uint32_t entry_number = read_u32(op_table_data, idx);
+ idx += sizeof(uint32_t);
+
+ while (idx < op_table_data.size())
+ {
+ if (idx + 2 * sizeof(uint32_t) > op_table_data.size())
+ throw std::runtime_error("Op table decode error : invalid entry item");
+
+ uint32_t id = read_u32(op_table_data, idx);
+ idx += sizeof(uint32_t);
+
+ uint32_t node_num = read_u32(op_table_data, idx);
+ idx += sizeof(uint32_t);
+
+ if (idx + sizeof(uint32_t) * node_num > op_table_data.size())
+ throw std::runtime_error("Source table decode error : invalid entry data");
+
+ std::set<uint32_t> source_ids;
+ for (uint32_t j = 0; j < node_num; ++j)
+ {
+ uint32_t origin = read_u32(op_table_data, idx);
+ idx += sizeof(uint32_t);
+
+ source_ids.insert(origin);
+ }
+
+ if (node_source_ids_map.insert({id, source_ids}).second == false)
+ throw std::runtime_error("Op table decode error : duplicated origin ID");
+ }
+
+ if (idx != op_table_data.size())
+ throw std::runtime_error("Op table decode error : data size invalid");
+
+ if (node_source_ids_map.size() != entry_number)
+ throw std::runtime_error("Op table decode error : entry number invalid");
+
+ return node_source_ids_map;
+}
+
+} // namespace
+
+namespace luci
+{
+
+CircleImportMetadata::CircleImportMetadata(const luci::CircleReader &reader)
+{
+ const auto &metadata = reader.metadata();
+ for (uint32_t i = 0; i < metadata.size(); ++i)
+ {
+ const circle::MetadataT &meta = *metadata[i];
+
+ assert(meta.buffer < reader.buffers().size());
+ const std::vector<uint8_t> &buffer = reader.buffers()[meta.buffer]->data;
+
+ if (meta.name.compare("ONE_op_table") == 0)
+ _op_table = decoded_op_table(buffer);
+ else if (meta.name.compare("ONE_source_table") == 0)
+ _source_table = decoded_source_table(buffer);
+ }
+}
+
+const OriginTable CircleImportMetadata::origin_table(void)
+{
+ OriginTable origin_table;
+
+ if (_op_table.size() > 0 && _source_table.size() > 0)
+ {
+ for (auto &kv : _op_table)
+ {
+ const auto node_id = kv.first;
+ const auto &source_ids = kv.second;
+
+ std::vector<std::shared_ptr<CircleNodeOrigin>> origins;
+ for (auto source_id : source_ids)
+ {
+ const auto source_name = _source_table.at(source_id);
+ origins.push_back(single_origin(source_id, source_name));
+ }
+
+ auto origin = composite_origin(origins);
+ origin_table.emplace(node_id, origin);
+ }
+ }
+
+ return origin_table;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_IMPORT_METADATA_H__
+#define __LUCI_CIRCLE_IMPORT_METADATA_H__
+
+#include "luci/Import/CircleReader.h"
+
+#include <luci/Profile/CircleNodeOrigin.h>
+
+#include <map>
+#include <set>
+#include <string>
+
+namespace luci
+{
+
+using OriginTable = std::map<uint32_t, std::shared_ptr<CircleNodeOrigin>>;
+
+class CircleImportMetadata
+{
+public:
+ CircleImportMetadata() = delete;
+
+ CircleImportMetadata(const luci::CircleReader &reader);
+
+public:
+ /**
+ * @brief Create origin table using _source_table and _op_table in CircleImportMetadata
+ * @note For creating origin table, both _op_table and _source_table should exist.
+ * If one of them does not exist, empty table is returned.
+ */
+ const OriginTable origin_table(void);
+
+private:
+ // Decoded metadata is stored
+ std::map<uint32_t, std::string> _source_table;
+ std::map<uint32_t, std::set<uint32_t>> _op_table;
+};
+
+} // namespace luci
+
+#endif // __LUCI_CIRCLE_IMPORT_METADATA_H__
case circle::SparseIndexVector_Int32Vector:
{
const auto const_vec_ptr =
- static_cast<const void *>(&(sparse_index_vector.AsInt32Vector()->values));
+ static_cast<const void *>(&(sparse_index_vector.AsInt32Vector()->values));
return SparseIndexVector{SparseIndexVectorType::I32, const_vec_ptr};
}
case circle::SparseIndexVector_Uint16Vector:
{
const auto const_vec_ptr =
- static_cast<const void *>(&(sparse_index_vector.AsUint16Vector()->values));
+ static_cast<const void *>(&(sparse_index_vector.AsUint16Vector()->values));
return SparseIndexVector{SparseIndexVectorType::U16, const_vec_ptr};
}
case circle::SparseIndexVector_Uint8Vector:
{
const auto const_vec_ptr =
- static_cast<const void *>(&(sparse_index_vector.AsUint8Vector()->values));
+ static_cast<const void *>(&(sparse_index_vector.AsUint8Vector()->values));
return SparseIndexVector{SparseIndexVectorType::U8, const_vec_ptr};
}
default:
node->name(tensor_name(tensor));
node->dtype(luci_datatype(tensor.type));
+ assert(tensor.shape_signature.size() == 0 ||
+ tensor.shape_signature.size() == tensor.shape.size());
+
std::vector<int32_t> dims = tensor.shape; // in NHWC
node->rank(dims.size());
for (uint32_t r = 0; r < dims.size(); ++r)
{
- node->dim(r) = loco::Dimension(dims[r]);
+ if (tensor.shape_signature.size() > 0 && tensor.shape_signature.at(r) == -1)
+ node->dim(r).unset();
+ else
+ node->dim(r).set(dims[r]);
}
- node->shape_signature(tensor.shape_signature);
-
const auto *quantization = tensor.quantization.get();
if (quantization != nullptr)
{
namespace luci
{
-void GraphBuilder::build(const circle::OperatorT &op, GraphBuilderContext *context) const
+CircleNode *GraphBuilder::build(const circle::OperatorT &op, GraphBuilderContext *context) const
{
LOGGER(l);
else
{
// If there is no tensor, insert CircleOutputExclude.
- input_nodes.push_back(context->graph()->nodes()->create<luci::CircleOutputExclude>());
+ auto *node = context->graph()->nodes()->create<luci::CircleOutputExclude>();
+ // CircleOutputExclude doesn't need a type, but since all nodes must have a type,
+ // a dummy type is inserted.
+ node->dtype(loco::DataType::FLOAT32);
+ input_nodes.push_back(node);
}
}
{
context->nodefinder()->enroll(outputs[0], node);
}
+
+ return node;
}
} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/GraphBuilderMultiOutput.h"
+
+#include <luci/Log.h>
+
+namespace luci
+{
+
+CircleNode *GraphBuilderMultiOutput::build(const circle::OperatorT &op,
+ GraphBuilderContext *context) const
+{
+ LOGGER(l);
+
+ assert(context != nullptr);
+
+ const std::vector<int32_t> &inputs = op.inputs;
+ const std::vector<int32_t> &outputs = op.outputs;
+ const auto &tensors = context->reader()->tensors();
+ const auto &opcodes = context->reader()->opcodes();
+ auto tensors_ptr = context->reader()->tensors_ptr();
+ assert(tensors_ptr != nullptr);
+
+ std::vector<CircleNode *> input_nodes;
+ for (const int32_t input_tensor_index : inputs)
+ {
+ if (input_tensor_index >= 0)
+ {
+ auto input = context->nodefinder()->node(input_tensor_index);
+ if (input == nullptr)
+ INFO(l) << "[luci] Warning: input node is null " << input_tensor_index << std::endl;
+ input_nodes.push_back(input);
+ }
+ else
+ {
+ // If there is no tensor, insert CircleOutputExclude.
+ auto *node = context->graph()->nodes()->create<luci::CircleOutputExclude>();
+ // CircleOutputExclude doesn't need a type, but since all nodes must have a type,
+ // a dummy type is inserted.
+ node->dtype(loco::DataType::FLOAT32);
+ input_nodes.push_back(node);
+ }
+ }
+
+ BuildNodeArgs bna(op, context, input_nodes);
+ auto *node = build_node(bna);
+
+ uint32_t output_count = outputs.size();
+ assert(output_count > 0);
+ {
+ // Let's use attributes from output 0 for this node
+ const circle::TensorT &output_tensor = *tensors[outputs[0]];
+ node->name(tensor_name(output_tensor));
+ node->dtype(luci_datatype(output_tensor.type));
+
+ // mark operator version
+ node->op_version(opcodes[op.opcode_index].get()->version);
+
+ // NOTE We don't set quantization for multiple output nodes but to virtual outputs
+ }
+
+ // Create virtual outputs of Virtual Output node(s)
+ for (uint32_t n = 0; n < output_count; ++n)
+ {
+ const circle::TensorT &output_tensor = *tensors[outputs[n]];
+
+ BuildOutArgs boa(node, n);
+ auto *nodeout = build_out(boa);
+
+ copy_tensor_attributes(output_tensor, nodeout);
+ // mark shape_status
+ if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
+ nodeout->shape_status(ShapeStatus::NOSHAPE);
+ else
+ nodeout->shape_status(ShapeStatus::VALID);
+
+ context->nodefinder()->enroll(outputs[n], nodeout);
+ }
+
+ return node;
+}
+
+} // namespace luci
CIRCLE_NODE(BATCH_TO_SPACE_ND, CircleBatchToSpaceNDGraphBuilder); // 37
CIRCLE_NODE(BCQ_FULLY_CONNECTED, CircleBCQFullyConnectedGraphBuilder); // 253
CIRCLE_NODE(BCQ_GATHER, CircleBCQGatherGraphBuilder); // 252
+ CIRCLE_NODE(BIDIRECTIONAL_SEQUENCE_LSTM, CircleBidirectionalSequenceLSTMGraphBuilder); // 52
CIRCLE_NODE(CAST, CircleCastGraphBuilder); // 53
CIRCLE_NODE(CEIL, CircleCeilGraphBuilder); // 104
CIRCLE_NODE(CUSTOM, CircleCustomGraphBuilder); // 32
CIRCLE_NODE(EQUAL, CircleEqualGraphBuilder); // 71
CIRCLE_NODE(EXP, CircleExpGraphBuilder); // 47
CIRCLE_NODE(EXPAND_DIMS, CircleExpandDimsGraphBuilder); // 70
+ CIRCLE_NODE(FAKE_QUANT, CircleFakeQuantGraphBuilder); // 80
CIRCLE_NODE(FILL, CircleFillGraphBuilder); // 94
CIRCLE_NODE(FLOOR, CircleFloorGraphBuilder); // 8
CIRCLE_NODE(FLOOR_DIV, CircleFloorDivGraphBuilder); // 90
// BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN = 35,
// BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN = 46,
// BuiltinOperator_DELEGATE = 51,
- // BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM = 52,
// BuiltinOperator_ARG_MAX = 56,
- // BuiltinOperator_FAKE_QUANT = 80,
// BuiltinOperator_QUANTIZE = 114,
// BuiltinOperator_HARD_SWISH = 117,
// BuiltinOperator_DENSIFY = 124,
*/
#include "luci/Importer.h"
+#include "CircleImportMetadata.h"
#include "PostImport.h"
#include "luci/Import/GraphBuilder.h"
#include <luci/IR/Module.h>
#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeID.h>
+#include <luci/Profile/CircleNodeOrigin.h>
#include <luci/Log.h>
#include <luci/LogHelper.h>
const auto &tensors = reader.tensors();
auto tensors_ptr = reader.tensors_ptr();
assert(tensors_ptr != nullptr);
+ auto circle_metadata = std::make_unique<luci::CircleImportMetadata>(reader);
// build a cache to identify if a tensor is output of an operator
// if this is set, we should not create a CircleConst for this tensor
// Data type
graph_input->dtype(input_node->dtype());
+ assert(tensor.shape_signature.size() == 0 ||
+ tensor.shape_signature.size() == tensor.shape.size());
+
// Shape of GraphInput
auto input_shape = std::make_unique<loco::TensorShape>();
const std::vector<int32_t> &input_dims = tensor.shape; // in NHWC
input_shape->rank(input_dims.size());
for (uint32_t r = 0; r < input_dims.size(); ++r)
- input_shape->dim(r) = loco::Dimension(input_dims[r]);
+ {
+ if (tensor.shape_signature.size() > 0 && tensor.shape_signature.at(r) == -1)
+ input_shape->dim(r).unset();
+ else
+ input_shape->dim(r).set(input_dims[r]);
+ }
graph_input->shape(std::move(input_shape));
}
// Note that operators in model are stored in execution order. This means that when importing
// an operator, its input operators have already been imported. We exploit this fact to set up
// node's inputs right after creating the node.
+ auto origin_table = circle_metadata->origin_table();
for (uint32_t i = 0; i < operators.size(); ++i)
{
const circle::OperatorT &op = *operators[i];
throw oops::UserExn("Invalid operator", reader.opcode_name(op));
}
- builder->build(op, &gb_context);
+ auto built_op = builder->build(op, &gb_context);
+ set_node_id(built_op, i);
+ if (origin_table.find(i) != origin_table.end())
+ add_origin(built_op, origin_table.at(i));
+ else
+ add_origin(built_op, luci::single_origin(i, built_op->name()));
}
else
{
// set the graph output name and node object
auto graph_output = graph->outputs()->create();
std::string tname = luci::tensor_name(tensor);
- graph_output->name("output_" + tname);
+ assert(tname.length() > 0);
+ graph_output->name(tname);
luci::copy_tensor_attributes(tensor, output_node);
// Set GraphInputOutputIndex for graph
output_node->index(graph_output->index());
+ assert(tensor.shape_signature.size() == 0 ||
+ tensor.shape_signature.size() == tensor.shape.size());
+
// Shape of Output
auto output_shape = std::make_unique<loco::TensorShape>();
const std::vector<int32_t> &output_dims = tensor.shape; // in NHWC
output_shape->rank(output_dims.size());
for (uint32_t r = 0; r < output_dims.size(); ++r)
- output_shape->dim(r) = loco::Dimension(output_dims[r]);
+ {
+ if (tensor.shape_signature.size() > 0 && tensor.shape_signature.at(r) == -1)
+ output_shape->dim(r).unset();
+ else
+ output_shape->dim(r).set(output_dims[r]);
+ }
graph_output->shape(std::move(output_shape));
// Data type
{
bool CircleAbsGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 1)
- return false;
-
// TODO Support type check
- return true;
+ return GraphBuilder::validate(args, 1);
}
CircleNode *CircleAbsGraphBuilder::build_node(const circle::OperatorT &,
bool CircleAddGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 2)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 2);
}
CircleNode *CircleAddGraphBuilder::build_node(const circle::OperatorT &op,
bool CircleArgMaxGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 2)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 2);
}
CircleNode *CircleArgMaxGraphBuilder::build_node(const circle::OperatorT &op,
bool CircleArgMinGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 2)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 2);
}
CircleNode *CircleArgMinGraphBuilder::build_node(const circle::OperatorT &op,
bool CircleAveragePool2DGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 1)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 1);
}
CircleNode *CircleAveragePool2DGraphBuilder::build_node(const circle::OperatorT &op,
bool CircleBCQFullyConnectedGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 5)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 5);
}
CircleNode *CircleBCQFullyConnectedGraphBuilder::build_node(const circle::OperatorT &op,
node->bias(inputs.at(3));
node->weights_clusters(inputs.at(4));
- // TODO Find and move to appropriate place for setting optional input
- if (auto bias = dynamic_cast<luci::CircleOutputExclude *>(node->bias()))
- {
- // bias is not used for type inference, but node itself should have a type
- bias->dtype(loco::DataType::FLOAT32);
-
- // bias is not used for shape inference
- }
-
const auto *options = op.builtin_options.AsBCQFullyConnectedOptions();
node->weights_hidden_size(options->weights_hidden_size);
node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
bool CircleBCQGatherGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 4)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 4);
}
CircleNode *CircleBCQGatherGraphBuilder::build_node(const circle::OperatorT &op,
bool CircleBatchMatMulGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 2)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 2);
}
CircleNode *CircleBatchMatMulGraphBuilder::build_node(const circle::OperatorT &op,
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleBidirectionalSequenceLSTM.h"
+
+#include <luci/IR/Nodes/CircleBidirectionalSequenceLSTM.h>
+#include <luci/IR/Nodes/CircleBidirectionalSequenceLSTMOut.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CircleBidirectionalSequenceLSTMGraphBuilder::validate(const ValidateArgs &args) const
+{
+ if (args.op.inputs.size() != 48)
+ return false;
+ if (args.op.outputs.size() != 2)
+ return false;
+
+ return true;
+}
+
+CircleNode *CircleBidirectionalSequenceLSTMGraphBuilder::build_node(const BuildNodeArgs &bna) const
+{
+ auto *node = bna.context->graph()->nodes()->create<CircleBidirectionalSequenceLSTM>();
+ auto &inputs = bna.input_nodes;
+ node->input(inputs.at(0));
+ node->fw_input_to_input_weights(inputs.at(1)); // Optional
+ node->fw_input_to_cell_weights(inputs.at(2));
+ node->fw_input_to_forget_weights(inputs.at(3));
+ node->fw_input_to_output_weights(inputs.at(4));
+ node->fw_recurrent_to_input_weights(inputs.at(5)); // Optional
+ node->fw_recurrent_to_cell_weights(inputs.at(6));
+ node->fw_recurrent_to_forget_weights(inputs.at(7));
+ node->fw_recurrent_to_output_weights(inputs.at(8));
+ node->fw_cell_to_input_weights(inputs.at(9)); // Optional
+ node->fw_cell_to_forget_weights(inputs.at(10)); // Optional
+ node->fw_cell_to_output_weights(inputs.at(11)); // Optional
+ node->fw_input_gate_bias(inputs.at(12)); // Optional
+ node->fw_forget_gate_bias(inputs.at(13));
+ node->fw_cell_gate_bias(inputs.at(14));
+ node->fw_output_gate_bias(inputs.at(15));
+ node->fw_projection_weights(inputs.at(16)); // Optional
+ node->fw_projection_bias(inputs.at(17)); // Optional
+ node->bw_input_to_input_weights(inputs.at(18)); // Optional
+ node->bw_input_to_cell_weights(inputs.at(19));
+ node->bw_input_to_forget_weights(inputs.at(20));
+ node->bw_input_to_output_weights(inputs.at(21));
+ node->bw_recurrent_to_input_weights(inputs.at(22)); // Optional
+ node->bw_recurrent_to_cell_weights(inputs.at(23));
+ node->bw_recurrent_to_forget_weights(inputs.at(24));
+ node->bw_recurrent_to_output_weights(inputs.at(25));
+ node->bw_cell_to_input_weights(inputs.at(26)); // Optional
+ node->bw_cell_to_forget_weights(inputs.at(27)); // Optional
+ node->bw_cell_to_output_weights(inputs.at(28)); // Optional
+ node->bw_input_gate_bias(inputs.at(29)); // Optional
+ node->bw_forget_gate_bias(inputs.at(30));
+ node->bw_cell_gate_bias(inputs.at(31));
+ node->bw_output_gate_bias(inputs.at(32));
+ node->bw_projection_weights(inputs.at(33)); // Optional
+ node->bw_projection_bias(inputs.at(34)); // Optional
+ node->fw_activation_state(inputs.at(35));
+ node->fw_cell_state(inputs.at(36));
+ node->bw_activation_state(inputs.at(37));
+ node->bw_cell_state(inputs.at(38));
+
+ node->auxillary_input(inputs.at(39)); // Optional
+ node->fw_auxillary_input_to_input_weights(inputs.at(40)); // Optional
+ node->fw_auxillary_input_to_forget_weights(inputs.at(41)); // Optional
+ node->fw_auxillary_input_to_cell_weights(inputs.at(42)); // Optional
+ node->fw_auxillary_input_to_output_weights(inputs.at(43)); // Optional
+ node->bw_auxillary_input_to_input_weights(inputs.at(44)); // Optional
+ node->bw_auxillary_input_to_forget_weights(inputs.at(45)); // Optional
+ node->bw_auxillary_input_to_cell_weights(inputs.at(46)); // Optional
+ node->bw_auxillary_input_to_output_weights(inputs.at(47)); // Optional
+
+ const auto *options = bna.op.builtin_options.AsBidirectionalSequenceLSTMOptions();
+ node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
+ node->cell_clip(options->cell_clip);
+ node->proj_clip(options->proj_clip);
+ node->merge_outputs(options->merge_outputs);
+ node->time_major(options->time_major);
+ node->asymmetric_quantize_inputs(options->asymmetric_quantize_inputs);
+
+ return node;
+}
+
+CircleNode *CircleBidirectionalSequenceLSTMGraphBuilder::build_out(const BuildOutArgs &boa) const
+{
+ auto *nodeout = boa.node->graph()->nodes()->create<CircleBidirectionalSequenceLSTMOut>();
+
+ nodeout->input(boa.node);
+ nodeout->index(boa.index);
+
+ return nodeout;
+}
+
+} // namespace luci
{
LOGGER(l);
+ if (!GraphBuilder::validate(args, 1))
+ return false;
+
auto settings = luci::UserSettings::settings();
const auto &inputs = args.op.inputs;
const auto &outputs = args.op.outputs;
- if (inputs.size() != 1)
- return false;
- if (outputs.size() != 1)
- return false;
// NOTE real models do have type mismatch
const auto *options = args.op.builtin_options.AsCastOptions();
bool CircleCeilGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
- if (inputs.size() != 1)
- return false;
- if (outputs.size() != 1)
- return false;
-
// TODO dtype check
-
- return true;
+ return GraphBuilder::validate(args, 1);
}
CircleNode *CircleCeilGraphBuilder::build_node(const circle::OperatorT &,
bool CircleConv2DGraphBuilder::validate(const ValidateArgs &args) const
{
// Circle Conv2D may not have a bias but we won't support this
- if (args.op.inputs.size() != 3)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 3);
}
CircleNode *CircleConv2DGraphBuilder::build_node(const circle::OperatorT &op,
bool CircleCosGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 1)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 1);
}
CircleNode *CircleCosGraphBuilder::build_node(const circle::OperatorT &,
return true;
}
-void CircleCustomGraphBuilder::build(const circle::OperatorT &op,
- GraphBuilderContext *context) const
+CircleNode *CircleCustomGraphBuilder::build_node(const BuildNodeArgs &bna) const
{
- assert(context != nullptr);
+ uint32_t input_count = bna.op.inputs.size();
+ uint32_t output_count = bna.op.outputs.size();
- auto graph = context->graph();
+ auto *node = bna.context->graph()->nodes()->create<CircleCustom>(input_count, output_count);
- const std::vector<int32_t> &inputs = op.inputs;
- const std::vector<int32_t> &outputs = op.outputs;
- const auto &tensors = context->reader()->tensors();
- auto tensors_ptr = context->reader()->tensors_ptr();
- assert(tensors_ptr != nullptr);
+ for (uint32_t idx = 0; idx < input_count; ++idx)
+ {
+ node->inputs(idx, bna.input_nodes[idx]);
+ }
- // Create CircleCustom
- const auto &opcodes = context->reader()->opcodes();
- const uint32_t opcode_index = op.opcode_index;
+ const auto &opcodes = bna.context->reader()->opcodes();
+ const uint32_t opcode_index = bna.op.opcode_index;
const circle::OperatorCodeT &opcode = *opcodes[opcode_index];
- auto *node = graph->nodes()->create<CircleCustom>(inputs.size());
- uint32_t input_idx = 0;
- for (const int32_t input_tensor_index : inputs)
- {
- node->inputs(input_idx++, context->nodefinder()->node(input_tensor_index));
- }
- node->custom_options(std::vector<uint8_t>{op.custom_options.begin(), op.custom_options.end()});
+ node->custom_options(
+ std::vector<uint8_t>{bna.op.custom_options.begin(), bna.op.custom_options.end()});
node->custom_code(opcode.custom_code);
- // Operator version of custom is always 1, so do nothing
- uint32_t output_count = outputs.size();
+ // NOTE Operator version of custom is always 1
- assert(output_count > 0);
- {
- // Let's use attributes from output 0 for this node
- const circle::TensorT &output_tensor = *tensors[outputs[0]];
- node->name(tensor_name(output_tensor));
- node->dtype(luci_datatype(output_tensor.type));
- }
-
- // Create virtual outputs of Custom
- for (uint32_t n = 0; n < output_count; ++n)
- {
- const circle::TensorT &output_tensor = *tensors[outputs[n]];
+ return node;
+}
- auto *nodeout = graph->nodes()->create<CircleCustomOut>();
- copy_tensor_attributes(output_tensor, nodeout);
- // mark shape_status
- if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
- nodeout->shape_status(ShapeStatus::NOSHAPE);
- else
- nodeout->shape_status(ShapeStatus::VALID);
+CircleNode *CircleCustomGraphBuilder::build_out(const BuildOutArgs &boa) const
+{
+ auto *nodeout = boa.node->graph()->nodes()->create<CircleCustomOut>();
- nodeout->input(node);
- nodeout->index(n);
+ nodeout->input(boa.node);
+ nodeout->index(boa.index);
- context->nodefinder()->enroll(outputs[n], nodeout);
- }
+ return nodeout;
}
} // namespace luci
bool CircleDepthToSpaceGraphBuilder::validate(const ValidateArgs &args) const
{
+ if (!GraphBuilder::validate(args, 1))
+ return false;
+
const auto &inputs = args.op.inputs;
const auto &outputs = args.op.outputs;
const auto *options = args.op.builtin_options.AsDepthToSpaceOptions();
-
- if (inputs.size() != 1)
- return false;
-
- if (outputs.size() != 1)
- return false;
-
const auto &tensors = args.reader.tensors();
if (tensors[outputs[0]]->type != tensors[inputs.at(0)]->type)
if (args.op.outputs.size() != 1)
return false;
+ const auto &tensors = args.reader.tensors();
+
+ // input shape
+ const auto &input = tensors.at(args.op.inputs.at(0));
+ const auto &input_shape = input->shape;
+
+ // input shape must be rank 4
+ if (input_shape.size() != 4)
+ return false;
+
+ // filter shape
+ const auto &filter = tensors.at(args.op.inputs.at(1));
+ const auto &filter_shape = filter->shape;
+
+ // filter shape must be rank 4
+ if (filter_shape.size() != 4)
+ return false;
+
+ // multiplier
+ const auto *options = args.op.builtin_options.AsDepthwiseConv2DOptions();
+ const auto &multiplier = options->depth_multiplier;
+
+ // filter represents as [1, H, W, C*M] where M is multiplier.
+ if (filter_shape.at(3) != input_shape.at(3) * multiplier)
+ return false;
+
return true;
}
bool CircleDequantizeGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 1)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 1);
}
CircleNode *CircleDequantizeGraphBuilder::build_node(const circle::OperatorT &,
bool CircleDivGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 2)
- return false;
-
- if (args.op.outputs.size() != 1)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 2);
}
CircleNode *CircleDivGraphBuilder::build_node(const circle::OperatorT &op,
bool CircleEluGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
-
- if (inputs.size() != 1)
+ if (!GraphBuilder::validate(args, 1))
return false;
- if (outputs.size() != 1)
- return false;
+ const auto &inputs = args.op.inputs;
+ const auto &outputs = args.op.outputs;
const auto &tensors = args.reader.tensors();
const auto &tensor = tensors.at(inputs.at(0));
bool CircleEqualGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
-
- if (inputs.size() != 2)
- {
+ if (!GraphBuilder::validate(args, 2))
return false;
- }
+ const auto &inputs = args.op.inputs;
const auto &tensors = args.reader.tensors();
return tensors[inputs.at(0)]->type == tensors[inputs.at(1)]->type;
bool CircleExpGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- if (inputs.size() != 1)
+ if (!GraphBuilder::validate(args, 1))
return false;
+ const auto &inputs = args.op.inputs;
// input type check
const auto &tensors = args.reader.tensors();
const auto &tensor = tensors.at(inputs.at(0));
bool CircleExpandDimsGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
-
- if (inputs.size() != 2)
- {
+ if (!GraphBuilder::validate(args, 2))
return false;
- }
+ const auto &inputs = args.op.inputs;
const auto &tensors = args.reader.tensors();
return tensors[inputs.at(1)]->type == circle::TensorType_INT32;
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleFakeQuant.h"
+
+#include <luci/IR/Nodes/CircleFullyConnected.h>
+#include <luci/IR/Nodes/CircleOutput.h>
+
+#include <loco.h>
+#include <oops/UserExn.h>
+
+namespace luci
+{
+
+bool CircleFakeQuantGraphBuilder::validate(const ValidateArgs &args) const
+{
+ return GraphBuilder::validate(args, 1);
+}
+
+CircleNode *CircleFakeQuantGraphBuilder::build_node(const circle::OperatorT &op,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const
+{
+ auto *node = graph->nodes()->create<CircleFakeQuant>();
+ node->inputs(inputs.at(0));
+
+ const auto *options = op.builtin_options.AsFakeQuantOptions();
+ node->min(options->min);
+ node->max(options->max);
+ node->num_bits(options->num_bits);
+ node->narrow_range(options->narrow_range);
+
+ return node;
+}
+
+} // namespace luci
bool CircleFillGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 2)
- return false;
-
- if (args.op.outputs.size() != 1)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 2);
}
CircleNode *CircleFillGraphBuilder::build_node(const circle::OperatorT &op,
bool CircleFloorGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
- if (inputs.size() != 1)
- return false;
- if (outputs.size() != 1)
- return false;
-
// TODO dtype check
-
- return true;
+ return GraphBuilder::validate(args, 1);
}
CircleNode *CircleFloorGraphBuilder::build_node(const circle::OperatorT &,
bool CircleFloorDivGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
-
- if (inputs.size() != 2)
- {
+ if (!GraphBuilder::validate(args, 2))
return false;
- }
-
- if (outputs.size() != 1)
- {
- return false;
- }
+ const auto &inputs = args.op.inputs;
+ const auto &outputs = args.op.outputs;
const auto &tensors = args.reader.tensors();
const auto &tensor_in_0 = tensors.at(inputs.at(0));
const auto &tensor_in_1 = tensors.at(inputs.at(1));
bool CircleFloorModGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
- if (inputs.size() != 2)
- return false;
- if (outputs.size() != 1)
+ if (!GraphBuilder::validate(args, 2))
return false;
+ const auto &inputs = args.op.inputs;
const auto &tensors = args.reader.tensors();
const auto &tensor_in_0 = tensors.at(inputs.at(0));
const auto &tensor_in_1 = tensors.at(inputs.at(1));
bool CircleFullyConnectedGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 3)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 3);
}
CircleNode *CircleFullyConnectedGraphBuilder::build_node(const circle::OperatorT &op,
node->weights(inputs.at(1));
node->bias(inputs.at(2)); // bias is optional
- // TODO Find and move to appropriate place for setting optional input
- if (auto bias = dynamic_cast<luci::CircleOutputExclude *>(node->bias()))
- {
- // bias is not used for type inference, but node itself should have a type
- bias->dtype(loco::DataType::FLOAT32);
-
- // bias is not used for shape inference
- }
-
const auto *options = op.builtin_options.AsFullyConnectedOptions();
node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
node->weights_format(luci_weights_format(options->weights_format));
bool CircleGatherGraphBuilder::validate(const ValidateArgs &args) const
{
+ if (!GraphBuilder::validate(args, 2))
+ return false;
+
const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
const auto *options = args.op.builtin_options.AsGatherOptions();
int32_t axis = options->axis;
- if (inputs.size() != 2)
- return false;
-
- if (outputs.size() != 1)
- return false;
-
if (axis < 0)
axis += inputs.size();
bool CircleGatherNdGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
-
- if (inputs.size() != 2)
- return false;
-
- if (outputs.size() != 1)
+ if (!GraphBuilder::validate(args, 2))
return false;
+ const auto &inputs = args.op.inputs;
auto &indices_tensor = args.reader.tensors()[inputs.at(1)];
if (!(indices_tensor->type == circle::TensorType::TensorType_INT32 ||
{
LOGGER(l);
+ if (!GraphBuilder::validate(args, 2))
+ return false;
+
auto settings = luci::UserSettings::settings();
const auto &inputs = args.op.inputs;
const auto &outputs = args.op.outputs;
-
- if (inputs.size() != 2)
- return false;
-
- if (outputs.size() != 1)
- return false;
-
const auto &tensors = args.reader.tensors();
if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type)
bool CircleGreaterEqualGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
-
- if (inputs.size() != 2)
- {
+ if (!GraphBuilder::validate(args, 2))
return false;
- }
-
- if (outputs.size() != 1)
- {
- return false;
- }
+ const auto &inputs = args.op.inputs;
+ const auto &outputs = args.op.outputs;
const auto &tensors = args.reader.tensors();
if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type)
* \- CircleIfOut --- Node ---
*/
-void CircleIfGraphBuilder::build(const circle::OperatorT &op, GraphBuilderContext *context) const
+CircleNode *CircleIfGraphBuilder::build_node(const BuildNodeArgs &bna) const
{
- assert(context != nullptr);
+ uint32_t input_count = bna.op.inputs.size() - 1;
+ uint32_t output_count = bna.op.outputs.size();
- auto graph = context->graph();
+ auto *node = bna.context->graph()->nodes()->create<CircleIf>(input_count, output_count);
- const std::vector<int32_t> &inputs = op.inputs;
- const std::vector<int32_t> &outputs = op.outputs;
- const auto &tensors = context->reader()->tensors();
- const auto &opcodes = context->reader()->opcodes();
- auto tensors_ptr = context->reader()->tensors_ptr();
- assert(tensors_ptr != nullptr);
-
- std::vector<CircleNode *> input_nodes;
- for (const int32_t input_tensor_index : inputs)
- {
- input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
- }
-
- uint32_t input_count = inputs.size() - 1;
- uint32_t output_count = outputs.size();
-
- // Create CircleIf
- CircleIf *node = graph->nodes()->create<CircleIf>(input_count, output_count);
-
- node->cond(input_nodes[0]);
+ node->cond(bna.input_nodes[0]);
for (uint32_t idx = 0; idx < input_count; ++idx)
{
- node->input(idx, input_nodes[idx + 1]);
+ node->input(idx, bna.input_nodes[idx + 1]);
}
- const auto *options = op.builtin_options.AsIfOptions();
+ const auto *options = bna.op.builtin_options.AsIfOptions();
node->then_branch(options->then_subgraph_index);
node->else_branch(options->else_subgraph_index);
- assert(outputs.size() > 0);
- {
- // Lets use name of output 0 as If name
- const circle::TensorT &output_tensor = *tensors[outputs[0]];
- node->name(tensor_name(output_tensor));
- node->op_version(opcodes[op.opcode_index].get()->version);
-
- // NOTE We don't set quantization for If itself but to virtual outputs
- }
-
- // Create virtual outputs of If
- for (uint32_t n = 0; n < output_count; ++n)
- {
- const circle::TensorT &output_tensor = *tensors[outputs[n]];
+ return node;
+}
- auto *nodeout = graph->nodes()->create<CircleIfOut>();
- copy_tensor_attributes(output_tensor, nodeout);
- // mark shape_status
- if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
- nodeout->shape_status(ShapeStatus::NOSHAPE);
- else
- nodeout->shape_status(ShapeStatus::VALID);
+CircleNode *CircleIfGraphBuilder::build_out(const BuildOutArgs &boa) const
+{
+ auto *nodeout = boa.node->graph()->nodes()->create<CircleIfOut>();
- nodeout->input(node);
- nodeout->index(n);
+ nodeout->input(boa.node);
+ nodeout->index(boa.index);
- context->nodefinder()->enroll(outputs[n], nodeout);
- }
+ return nodeout;
}
} // namespace luci
bool CircleInstanceNormGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 3)
- return false;
-
// TODO check dtypes
-
- return true;
+ return GraphBuilder::validate(args, 3);
}
CircleNode *CircleInstanceNormGraphBuilder::build_node(const circle::OperatorT &op,
bool CircleL2NormalizeGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
-
- if (inputs.size() != 1)
- {
- return false;
- }
-
- if (outputs.size() != 1)
- {
- return false;
- }
-
- return true;
+ return GraphBuilder::validate(args, 1);
}
CircleNode *CircleL2NormalizeGraphBuilder::build_node(const circle::OperatorT &op,
bool CircleL2Pool2DGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 1)
- return false;
-
// TODO check dtypes
-
- return true;
+ return GraphBuilder::validate(args, 1);
}
CircleNode *CircleL2Pool2DGraphBuilder::build_node(const circle::OperatorT &op,
bool CircleLeakyReluGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 1)
- return false;
-
- if (args.op.outputs.size() != 1)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 1);
}
CircleNode *CircleLeakyReluGraphBuilder::build_node(const circle::OperatorT &op,
bool CircleLessGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
-
- if (inputs.size() != 2)
- {
+ if (!GraphBuilder::validate(args, 2))
return false;
- }
-
- if (outputs.size() != 1)
- {
- return false;
- }
+ const auto &inputs = args.op.inputs;
+ const auto &outputs = args.op.outputs;
const auto &tensors = args.reader.tensors();
const auto &tensor = tensors.at(inputs.at(0));
bool CircleLessEqualGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
-
- if (inputs.size() != 2)
- {
+ if (!GraphBuilder::validate(args, 2))
return false;
- }
-
- if (outputs.size() != 1)
- {
- return false;
- }
+ const auto &inputs = args.op.inputs;
+ const auto &outputs = args.op.outputs;
const auto &tensors = args.reader.tensors();
if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type)
bool CircleLocalResponseNormalizationGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 1)
- return false;
-
// TODO do attribute checks
-
- return true;
+ return GraphBuilder::validate(args, 1);
}
CircleNode *CircleLocalResponseNormalizationGraphBuilder::build_node(
- const circle::OperatorT &op, const std::vector<CircleNode *> &inputs, loco::Graph *graph) const
+ const circle::OperatorT &op, const std::vector<CircleNode *> &inputs, loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleLocalResponseNormalization>();
node->input(inputs.at(0));
bool CircleLogGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- if (inputs.size() != 1)
- return false;
- if (args.op.outputs.size() != 1)
+ if (!GraphBuilder::validate(args, 1))
return false;
+ const auto &inputs = args.op.inputs;
// input type check
// Must be one of bfloat16, half, float32, float64, complex64, complex128.
// Currently circle supports half(float16), float32, float64, complex64.
bool CircleLogSoftmaxGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 1)
- return false;
-
// TODO do attribute checks
-
- return true;
+ return GraphBuilder::validate(args, 1);
}
CircleNode *CircleLogSoftmaxGraphBuilder::build_node(const circle::OperatorT &,
bool CircleLogicalAndGraphBuilder::validate(const ValidateArgs &args) const
{
- // Only BOOL type is allowed for inputs
- const auto &inputs = args.op.inputs;
- if (inputs.size() != 2)
+ if (!GraphBuilder::validate(args, 2))
return false;
+ // Only BOOL type is allowed for inputs
+ const auto &inputs = args.op.inputs;
const auto &tensors = args.reader.tensors();
for (auto input : inputs)
{
bool CircleLogicalNotGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 1)
+ if (!GraphBuilder::validate(args, 1))
return false;
// Only BOOL type is allowed for the input
bool CircleLogicalOrGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 2)
+ if (!GraphBuilder::validate(args, 2))
return false;
// Only BOOL type is allowed for inputs
bool CircleLogisticGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- if (inputs.size() != 1)
- return false;
- const auto &outputs = args.op.outputs;
- if (outputs.size() != 1)
+ if (!GraphBuilder::validate(args, 1))
return false;
+ const auto &inputs = args.op.inputs;
+ const auto &outputs = args.op.outputs;
const auto &tensors = args.reader.tensors();
if (tensors.at(inputs.at(0))->type != tensors.at(outputs[0])->type)
return false;
bool CircleMatrixDiagGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
-
- if (inputs.size() != 1)
- return false;
-
- if (outputs.size() != 1)
+ if (!GraphBuilder::validate(args, 1))
return false;
+ const auto &inputs = args.op.inputs;
+ const auto &outputs = args.op.outputs;
const auto &tensors = args.reader.tensors();
const auto &tensor = tensors.at(inputs.at(0));
bool CircleMatrixSetDiagGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
-
- if (inputs.size() != 2)
- return false;
-
- if (outputs.size() != 1)
+ if (!GraphBuilder::validate(args, 2))
return false;
+ const auto &inputs = args.op.inputs;
+ const auto &outputs = args.op.outputs;
const auto &tensors = args.reader.tensors();
const auto &tensor = tensors.at(inputs.at(0));
bool CircleMaxPool2DGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 1)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 1);
}
CircleNode *CircleMaxPool2DGraphBuilder::build_node(const circle::OperatorT &op,
bool CircleMeanGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 2)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 2);
}
CircleNode *CircleMeanGraphBuilder::build_node(const circle::OperatorT &op,
bool CircleMirrorPadGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 2)
- return false;
-
// TODO check others
-
- return true;
+ return GraphBuilder::validate(args, 2);
}
CircleNode *CircleMirrorPadGraphBuilder::build_node(const circle::OperatorT &op,
bool CircleMulGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 2)
- return false;
-
- if (args.op.outputs.size() != 1)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 2);
}
CircleNode *CircleMulGraphBuilder::build_node(const circle::OperatorT &op,
{
bool CircleNegGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 1)
- return false;
-
// TODO Support type check
- return true;
+ return GraphBuilder::validate(args, 1);
}
CircleNode *CircleNegGraphBuilder::build_node(const circle::OperatorT &,
* We will create multiple NonMasSuppressionV4Oout nodes to emulate this
*/
-void CircleNonMaxSuppressionV4GraphBuilder::build(const circle::OperatorT &op,
- GraphBuilderContext *context) const
+CircleNode *CircleNonMaxSuppressionV4GraphBuilder::build_node(const BuildNodeArgs &bna) const
{
- assert(context != nullptr);
-
- auto graph = context->graph();
-
- const std::vector<int32_t> &inputs = op.inputs;
- const std::vector<int32_t> &outputs = op.outputs;
- const auto &tensors = context->reader()->tensors();
- const auto &opcodes = context->reader()->opcodes();
- auto tensors_ptr = context->reader()->tensors_ptr();
- assert(tensors_ptr != nullptr);
-
- std::vector<CircleNode *> input_nodes;
- for (const int32_t input_tensor_index : inputs)
- {
- input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
- }
-
- // Create CircleNonMaxSuppressionV4
- auto node = graph->nodes()->create<CircleNonMaxSuppressionV4>();
- node->boxes(input_nodes[0]);
- node->scores(input_nodes[1]);
- node->max_output_size(input_nodes[2]);
- node->iou_threshold(input_nodes[3]);
- node->score_threshold(input_nodes[4]);
-
- assert(outputs.size() == 2);
- {
- // Let's use name of output 0 as NonMaxSuppressionV4 name
- const circle::TensorT &output_tensor = *tensors[outputs[0]];
- node->name(tensor_name(output_tensor));
- node->op_version(opcodes[op.opcode_index].get()->version);
-
- // NOTE We don't set quantization for NonMaxSuppressionV4 itself but to virtual outputs
- }
-
- // Create virtual outputs of NonMaxSuppressionV4
- for (size_t n = 0; n < outputs.size(); ++n)
- {
- const circle::TensorT &output_tensor = *tensors[outputs[n]];
-
- auto *nodeout = graph->nodes()->create<CircleNonMaxSuppressionV4Out>();
- copy_tensor_attributes(output_tensor, nodeout);
-
- // mark shape_status
- if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
- nodeout->shape_status(ShapeStatus::NOSHAPE);
- else
- nodeout->shape_status(ShapeStatus::VALID);
-
- nodeout->input(node);
- nodeout->index(n);
-
- context->nodefinder()->enroll(outputs[n], nodeout);
- }
+ auto node = bna.context->graph()->nodes()->create<CircleNonMaxSuppressionV4>();
+
+ node->boxes(bna.input_nodes[0]);
+ node->scores(bna.input_nodes[1]);
+ node->max_output_size(bna.input_nodes[2]);
+ node->iou_threshold(bna.input_nodes[3]);
+ node->score_threshold(bna.input_nodes[4]);
+
+ return node;
+}
+
+CircleNode *CircleNonMaxSuppressionV4GraphBuilder::build_out(const BuildOutArgs &boa) const
+{
+ auto *nodeout = boa.node->graph()->nodes()->create<CircleNonMaxSuppressionV4Out>();
+
+ nodeout->input(boa.node);
+ nodeout->index(boa.index);
+
+ return nodeout;
}
} // namespace luci
* We will create multiple NonMasSuppressionV5Oout nodes to emulate this
*/
-void CircleNonMaxSuppressionV5GraphBuilder::build(const circle::OperatorT &op,
- GraphBuilderContext *context) const
+CircleNode *CircleNonMaxSuppressionV5GraphBuilder::build_node(const BuildNodeArgs &bna) const
{
- assert(context != nullptr);
-
- auto graph = context->graph();
-
- const std::vector<int32_t> &inputs = op.inputs;
- const std::vector<int32_t> &outputs = op.outputs;
- const auto &tensors = context->reader()->tensors();
- const auto &opcodes = context->reader()->opcodes();
- auto tensors_ptr = context->reader()->tensors_ptr();
- assert(tensors_ptr != nullptr);
-
- std::vector<CircleNode *> input_nodes;
- for (const int32_t input_tensor_index : inputs)
- {
- input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
- }
-
- // Create CircleNonMaxSuppressionV5
- auto node = graph->nodes()->create<CircleNonMaxSuppressionV5>();
- node->boxes(input_nodes[0]);
- node->scores(input_nodes[1]);
- node->max_output_size(input_nodes[2]);
- node->iou_threshold(input_nodes[3]);
- node->score_threshold(input_nodes[4]);
- node->soft_nms_sigma(input_nodes[5]);
-
- assert(outputs.size() == 3);
- {
- // Let's use name of output 0 as NonMaxSuppressionV5 name
- const circle::TensorT &output_tensor = *tensors[outputs[0]];
- node->name(tensor_name(output_tensor));
- node->op_version(opcodes[op.opcode_index].get()->version);
-
- // NOTE We don't set quantization for NonMaxSuppressionV5 itself but to virtual outputs
- }
-
- // Create virtual outputs of NonMaxSuppressionV5
- for (size_t n = 0; n < outputs.size(); ++n)
- {
- const circle::TensorT &output_tensor = *tensors[outputs[n]];
-
- auto *nodeout = graph->nodes()->create<CircleNonMaxSuppressionV5Out>();
- copy_tensor_attributes(output_tensor, nodeout);
-
- // mark shape_status
- if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
- nodeout->shape_status(ShapeStatus::NOSHAPE);
- else
- nodeout->shape_status(ShapeStatus::VALID);
-
- nodeout->input(node);
- nodeout->index(n);
-
- context->nodefinder()->enroll(outputs[n], nodeout);
- }
+ auto node = bna.context->graph()->nodes()->create<CircleNonMaxSuppressionV5>();
+
+ node->boxes(bna.input_nodes[0]);
+ node->scores(bna.input_nodes[1]);
+ node->max_output_size(bna.input_nodes[2]);
+ node->iou_threshold(bna.input_nodes[3]);
+ node->score_threshold(bna.input_nodes[4]);
+ node->soft_nms_sigma(bna.input_nodes[5]);
+
+ return node;
+}
+
+CircleNode *CircleNonMaxSuppressionV5GraphBuilder::build_out(const BuildOutArgs &boa) const
+{
+ auto *nodeout = boa.node->graph()->nodes()->create<CircleNonMaxSuppressionV5Out>();
+
+ nodeout->input(boa.node);
+ nodeout->index(boa.index);
+
+ return nodeout;
}
} // namespace luci
bool CircleNotEqualGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
-
- if (inputs.size() != 2)
- {
+ if (!GraphBuilder::validate(args, 2))
return false;
- }
-
- if (outputs.size() != 1)
- {
- return false;
- }
+ const auto &inputs = args.op.inputs;
+ const auto &outputs = args.op.outputs;
const auto &tensors = args.reader.tensors();
if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type)
bool CircleOneHotGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
- const auto *options = args.op.builtin_options.AsOneHotOptions();
-
// Only 4 Input come refered from
- if (inputs.size() != 4)
- return false;
-
- if (outputs.size() != 1)
+ if (!GraphBuilder::validate(args, 4))
return false;
+ const auto &inputs = args.op.inputs;
+ const auto *options = args.op.builtin_options.AsOneHotOptions();
const auto &tensors = args.reader.tensors();
const auto &indices = tensors.at(inputs.at(0));
const auto &depth = tensors.at(inputs.at(1));
bool CirclePReluGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 2)
- return false;
-
- if (args.op.outputs.size() != 1)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 2);
}
CircleNode *CirclePReluGraphBuilder::build_node(const circle::OperatorT &,
bool CirclePadGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 2)
- return false;
-
// TODO do attribute checks
-
- return true;
+ return GraphBuilder::validate(args, 2);
}
CircleNode *CirclePadGraphBuilder::build_node(const circle::OperatorT &op,
bool CirclePadV2GraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 3)
- return false;
-
- if (args.op.outputs.size() != 1)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 3);
}
CircleNode *CirclePadV2GraphBuilder::build_node(const circle::OperatorT &op,
bool CirclePowGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 2)
- return false;
-
- if (args.op.outputs.size() != 1)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 2);
}
CircleNode *CirclePowGraphBuilder::build_node(const circle::OperatorT &,
{
bool CircleRangeGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 3)
- return false;
-
// TODO Support type check
- return true;
+ return GraphBuilder::validate(args, 3);
}
CircleNode *CircleRangeGraphBuilder::build_node(const circle::OperatorT &,
{
bool CircleRankGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 1)
- return false;
-
- if (args.op.outputs.size() != 1)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 1);
}
CircleNode *CircleRankGraphBuilder::build_node(const circle::OperatorT &,
bool CircleReduceAnyGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
- if (inputs.size() != 2)
- return false;
- if (outputs.size() != 1)
+ if (!GraphBuilder::validate(args, 2))
return false;
+ const auto &inputs = args.op.inputs;
+ const auto &outputs = args.op.outputs;
const auto &tensors = args.reader.tensors();
const auto &tensor_0 = tensors.at(inputs.at(0));
const auto &tensor_1 = tensors.at(inputs.at(1));
bool CircleReduceProdGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- if (inputs.size() != 2)
- return false;
- if (args.op.outputs.size() != 1)
+ if (!GraphBuilder::validate(args, 2))
return false;
+ const auto &inputs = args.op.inputs;
const auto &tensors = args.reader.tensors();
const auto &tensor_1 = tensors.at(inputs.at(1));
bool CircleReluGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 1)
- return false;
-
- if (args.op.outputs.size() != 1)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 1);
}
CircleNode *CircleReluGraphBuilder::build_node(const circle::OperatorT &,
bool CircleRelu6GraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 1)
- return false;
-
- if (args.op.outputs.size() != 1)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 1);
}
CircleNode *CircleRelu6GraphBuilder::build_node(const circle::OperatorT &,
bool CircleReluN1To1GraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 1)
- return false;
-
- if (args.op.outputs.size() != 1)
- return false;
-
// TODO check dtypes
-
- return true;
+ return GraphBuilder::validate(args, 1);
}
CircleNode *CircleReluN1To1GraphBuilder::build_node(const circle::OperatorT &,
if (args.op.outputs.size() != 1)
return false;
+ // for two inputs, check if type is S32
+ if (args.op.inputs.size() == 2)
+ {
+ const auto &inputs = args.op.inputs;
+ const auto &tensors = args.reader.tensors();
+ const auto &tensor_in = tensors.at(inputs.at(1));
+
+ // NOTE fix this if there is any other case
+ // TensorFlow lite and circle only supports S32
+ if (tensor_in->type != circle::TensorType::TensorType_INT32)
+ return false;
+ }
+
return true;
}
{
shape_node->at<loco::DataType::S32>(i) = shape[i];
}
+ shape_node->name("Reshape/shape");
return shape_node;
}
shape_node = graph->nodes()->create<CircleOutputDummy>();
shape_node->dtype(loco::DataType::S32);
shape_node->rank(0);
+ shape_node->name("Reshape/dummy");
}
}
#include "luci/Import/Nodes/CircleResizeBilinear.h"
-#include <luci/IR/Nodes/CircleConst.h>
#include <luci/IR/Nodes/CircleResizeBilinear.h>
namespace luci
bool CircleResizeBilinearGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 2)
- return false;
-
- if (args.op.outputs.size() != 1)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 2);
}
CircleNode *CircleResizeBilinearGraphBuilder::build_node(const circle::OperatorT &op,
#include "luci/Import/Nodes/CircleResizeNearestNeighbor.h"
-#include <luci/IR/Nodes/CircleConst.h>
#include <luci/IR/Nodes/CircleResizeNearestNeighbor.h>
namespace luci
bool CircleResizeNearestNeighborGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 2)
- return false;
-
- if (args.op.outputs.size() != 1)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 2);
}
CircleNode *CircleResizeNearestNeighborGraphBuilder::build_node(
- const circle::OperatorT &op, const std::vector<CircleNode *> &inputs, loco::Graph *graph) const
+ const circle::OperatorT &op, const std::vector<CircleNode *> &inputs, loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleResizeNearestNeighbor>();
node->input(inputs.at(0));
bool CircleReverseSequenceGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
-
- if (inputs.size() != 2)
- return false;
- if (outputs.size() != 1)
+ if (!GraphBuilder::validate(args, 2))
return false;
+ const auto &inputs = args.op.inputs;
+ const auto &outputs = args.op.outputs;
const auto &tensors = args.reader.tensors();
const auto &tensor_in = tensors.at(inputs.at(0));
const auto &tensor_lengths = tensors.at(inputs.at(1));
bool CircleReverseV2GraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
-
- if (inputs.size() != 2)
- return false;
- if (outputs.size() != 1)
+ if (!GraphBuilder::validate(args, 2))
return false;
+ const auto &inputs = args.op.inputs;
+ const auto &outputs = args.op.outputs;
const auto &tensors = args.reader.tensors();
const auto &tensor_in = tensors.at(inputs.at(0));
const auto &tensor_axis = tensors.at(inputs.at(1));
bool CircleRoundGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
-
- if (inputs.size() != 1)
- return false;
- if (outputs.size() != 1)
+ if (!GraphBuilder::validate(args, 1))
return false;
+ const auto &inputs = args.op.inputs;
+ const auto &outputs = args.op.outputs;
// Must be one of the following types
// bfloat16, half (float16), float32, float64, complex64, complex128
// Currently, circle supports float16, float32, complex64
bool CircleRsqrtGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- if (inputs.size() != 1)
+ if (!GraphBuilder::validate(args, 1))
return false;
+ const auto &inputs = args.op.inputs;
// Must be one of the following types
// bfloat16, half (float16), float32, float64, complex64, complex128
// Currently, circle supports float16, float32, complex64
const auto &tensor = tensors.at(inputs.at(0));
switch (tensor->type)
{
+ case circle::TensorType_UINT8:
+ case circle::TensorType_INT16:
case circle::TensorType_FLOAT16:
case circle::TensorType_FLOAT32:
case circle::TensorType_COMPLEX64:
bool CircleScatterNdGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- if (inputs.size() != 3)
+ if (!GraphBuilder::validate(args, 3))
return false;
+ const auto &inputs = args.op.inputs;
// indices must have the same type as shape
const auto &tensors = args.reader.tensors();
bool CircleSegmentSumGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
- if (inputs.size() != 2)
- return false;
- if (outputs.size() != 1)
+ if (!GraphBuilder::validate(args, 2))
return false;
+ const auto &inputs = args.op.inputs;
+ const auto &outputs = args.op.outputs;
const auto &tensors = args.reader.tensors();
const auto &tensor_in = tensors.at(inputs.at(0));
const auto &tensor_out = tensors.at(outputs[0]);
bool CircleSelectGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
- if (inputs.size() != 3)
- return false;
- if (outputs.size() != 1)
+ if (!GraphBuilder::validate(args, 3))
return false;
+ const auto &inputs = args.op.inputs;
const auto &tensors = args.reader.tensors();
const auto &tensor = tensors.at(inputs.at(0));
if (tensor->type != circle::TensorType_BOOL)
bool CircleSelectV2GraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
- if (inputs.size() != 3)
- return false;
- if (outputs.size() != 1)
+ if (!GraphBuilder::validate(args, 3))
return false;
+ const auto &inputs = args.op.inputs;
const auto &tensors = args.reader.tensors();
const auto &condition = tensors.at(inputs.at(0));
if (condition->type != circle::TensorType_BOOL)
bool CircleShapeGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
- if (inputs.size() != 1)
- return false;
- if (outputs.size() != 1)
- return false;
-
// TODO check shape, dtype
-
- return true;
+ return GraphBuilder::validate(args, 1);
}
CircleNode *CircleShapeGraphBuilder::build_node(const circle::OperatorT &op,
bool CircleSinGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- if (inputs.size() != 1)
- return false;
- if (args.op.outputs.size() != 1)
+ if (!GraphBuilder::validate(args, 1))
return false;
+ const auto &inputs = args.op.inputs;
// input type check
const auto &tensors = args.reader.tensors();
const auto &tensor = tensors.at(inputs.at(0));
bool CircleSliceGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 3)
- return false;
- if (args.op.outputs.size() != 1)
- return false;
-
// TODO check shapes and types
-
- return true;
+ return GraphBuilder::validate(args, 3);
}
CircleNode *CircleSliceGraphBuilder::build_node(const circle::OperatorT &,
bool CircleSoftmaxGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 1)
- return false;
-
// TODO do attribute checks
-
- return true;
+ return GraphBuilder::validate(args, 1);
}
CircleNode *CircleSoftmaxGraphBuilder::build_node(const circle::OperatorT &op,
bool CircleSpaceToDepthGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- if (inputs.size() != 1)
- return false;
-
// TODO do attribute checks
-
- return true;
+ return GraphBuilder::validate(args, 1);
}
CircleNode *CircleSpaceToDepthGraphBuilder::build_node(const circle::OperatorT &op,
bool CircleSparseToDenseGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 4)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 4);
}
CircleNode *CircleSparseToDenseGraphBuilder::build_node(const circle::OperatorT &op,
* \- CircleSplitOut --- FullyConnected ---
*/
-void CircleSplitGraphBuilder::build(const circle::OperatorT &op, GraphBuilderContext *context) const
+CircleNode *CircleSplitGraphBuilder::build_node(const BuildNodeArgs &bna) const
{
- assert(context != nullptr);
+ auto node = bna.context->graph()->nodes()->create<CircleSplit>();
- auto graph = context->graph();
+ node->split_dim(bna.input_nodes[0]);
+ node->input(bna.input_nodes[1]);
- const std::vector<int32_t> &inputs = op.inputs;
- const std::vector<int32_t> &outputs = op.outputs;
- const auto &tensors = context->reader()->tensors();
- const auto &opcodes = context->reader()->opcodes();
- auto tensors_ptr = context->reader()->tensors_ptr();
- assert(tensors_ptr != nullptr);
+ const auto *options = bna.op.builtin_options.AsSplitOptions();
+ node->num_split(options->num_splits);
- std::vector<CircleNode *> input_nodes;
- for (const int32_t input_tensor_index : inputs)
- {
- input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
- }
+ return node;
+}
- // Create CircleSplit
- auto node = graph->nodes()->create<CircleSplit>();
- node->split_dim(input_nodes[0]);
- node->input(input_nodes[1]);
+CircleNode *CircleSplitGraphBuilder::build_out(const BuildOutArgs &boa) const
+{
+ auto *nodeout = boa.node->graph()->nodes()->create<CircleSplitOut>();
- const auto *options = op.builtin_options.AsSplitOptions();
- node->num_split(options->num_splits);
+ nodeout->input(boa.node);
+ nodeout->index(boa.index);
- assert(outputs.size() > 0);
- assert(int32_t(outputs.size()) == options->num_splits);
- {
- // Let's use name of output 0 as Split name
- const circle::TensorT &output_tensor = *tensors[outputs[0]];
- node->name(tensor_name(output_tensor));
- node->op_version(opcodes[op.opcode_index].get()->version);
-
- // NOTE We don't set quantization for Split itself but to virtual outputs
- }
-
- // Create virtual outputs of Split
- for (int32_t n = 0; n < options->num_splits; ++n)
- {
- const circle::TensorT &output_tensor = *tensors[outputs[n]];
-
- auto *nodeout = graph->nodes()->create<CircleSplitOut>();
- copy_tensor_attributes(output_tensor, nodeout);
- // mark shape_status
- if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
- nodeout->shape_status(ShapeStatus::NOSHAPE);
- else
- nodeout->shape_status(ShapeStatus::VALID);
-
- nodeout->input(node);
- nodeout->index(n);
-
- context->nodefinder()->enroll(outputs[n], nodeout);
- }
+ return nodeout;
}
} // namespace luci
* \- CircleSplitVOut --- FullyConnected ---
*/
-void CircleSplitVGraphBuilder::build(const circle::OperatorT &op,
- GraphBuilderContext *context) const
+CircleNode *CircleSplitVGraphBuilder::build_node(const BuildNodeArgs &bna) const
{
- assert(context != nullptr);
-
- auto graph = context->graph();
-
- const std::vector<int32_t> &inputs = op.inputs;
- const std::vector<int32_t> &outputs = op.outputs;
- const auto &tensors = context->reader()->tensors();
- const auto &opcodes = context->reader()->opcodes();
- auto tensors_ptr = context->reader()->tensors_ptr();
- assert(tensors_ptr != nullptr);
-
- std::vector<CircleNode *> input_nodes;
- for (const int32_t input_tensor_index : inputs)
- {
- input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
- }
-
- // Create CircleSplitV
- auto node = graph->nodes()->create<CircleSplitV>();
- node->input(input_nodes[0]);
- node->size_splits(input_nodes[1]);
- node->split_dim(input_nodes[2]);
-
- const auto *options = op.builtin_options.AsSplitVOptions();
+ auto node = bna.context->graph()->nodes()->create<CircleSplitV>();
+
+ node->input(bna.input_nodes[0]);
+ node->size_splits(bna.input_nodes[1]);
+ node->split_dim(bna.input_nodes[2]);
+
+ const auto *options = bna.op.builtin_options.AsSplitVOptions();
node->num_split(options->num_splits);
- assert(outputs.size() > 0);
- assert(int32_t(outputs.size()) == options->num_splits);
- {
- // Let's use name of output 0 as Split name
- const circle::TensorT &output_tensor = *tensors[outputs[0]];
- node->name(tensor_name(output_tensor));
- node->op_version(opcodes[op.opcode_index].get()->version);
-
- // NOTE We don't set quantization for Split itself but to virtual outputs
- }
-
- // Create virtual outputs of Split
- for (int32_t n = 0; n < options->num_splits; ++n)
- {
- const circle::TensorT &output_tensor = *tensors[outputs[n]];
-
- auto *nodeout = graph->nodes()->create<CircleSplitVOut>();
- copy_tensor_attributes(output_tensor, nodeout);
- // mark shape_status
- if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
- nodeout->shape_status(ShapeStatus::NOSHAPE);
- else
- nodeout->shape_status(ShapeStatus::VALID);
-
- nodeout->input(node);
- nodeout->index(n);
-
- context->nodefinder()->enroll(outputs[n], nodeout);
- }
+ assert(int32_t(bna.op.outputs.size()) == options->num_splits);
+
+ return node;
+}
+
+CircleNode *CircleSplitVGraphBuilder::build_out(const BuildOutArgs &boa) const
+{
+ auto *nodeout = boa.node->graph()->nodes()->create<CircleSplitVOut>();
+
+ nodeout->input(boa.node);
+ nodeout->index(boa.index);
+
+ return nodeout;
}
} // namespace luci
bool CircleSqrtGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 1)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 1);
}
CircleNode *CircleSqrtGraphBuilder::build_node(const circle::OperatorT &,
bool CircleSquareGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- if (inputs.size() != 1)
+ if (!GraphBuilder::validate(args, 1))
return false;
+ const auto &inputs = args.op.inputs;
// Must be one of the following types
// bfloat16, half (float16), float32, float64, complex64, complex128
// Currently, circle supports float16, float32, complex64
bool CircleSquaredDifferenceGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
-
- if (inputs.size() != 2)
- return false;
-
- if (outputs.size() != 1)
+ if (!GraphBuilder::validate(args, 2))
return false;
+ const auto &inputs = args.op.inputs;
+ const auto &outputs = args.op.outputs;
// Inputs must be one of the following types
// bfloat16, half(float16), float32, float64, int32, int64, complex64, complex128
const auto &tensors = args.reader.tensors();
#include "luci/Import/Nodes/CircleSqueeze.h"
-#include <luci/IR/Nodes/CircleConst.h>
#include <luci/IR/Nodes/CircleSqueeze.h>
namespace luci
bool CircleSqueezeGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 1)
- return false;
-
- if (args.op.outputs.size() != 1)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 1);
}
CircleNode *CircleSqueezeGraphBuilder::build_node(const circle::OperatorT &op,
bool CircleStridedSliceGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 4)
- return false;
- if (args.op.outputs.size() != 1)
- return false;
-
// TODO check shapes and types
-
- return true;
+ return GraphBuilder::validate(args, 4);
}
CircleNode *CircleStridedSliceGraphBuilder::build_node(const circle::OperatorT &op,
bool CircleSubGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 2)
- return false;
-
- if (args.op.outputs.size() != 1)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 2);
}
CircleNode *CircleSubGraphBuilder::build_node(const circle::OperatorT &op,
bool CircleSumGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 2)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 2);
}
CircleNode *CircleSumGraphBuilder::build_node(const circle::OperatorT &op,
bool CircleTanhGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- if (inputs.size() != 1)
- return false;
- const auto &outputs = args.op.outputs;
- if (outputs.size() != 1)
+ if (!GraphBuilder::validate(args, 1))
return false;
+ const auto &inputs = args.op.inputs;
+ const auto &outputs = args.op.outputs;
const auto &tensors = args.reader.tensors();
if (tensors.at(inputs.at(0))->type != tensors.at(outputs[0])->type)
return false;
bool CircleTileGraphBuilder::validate(const ValidateArgs &args) const
{
- auto inputs = args.op.inputs;
- auto outputs = args.op.outputs;
-
- if (inputs.size() != 2)
- return false;
-
- if (outputs.size() != 1)
+ if (!GraphBuilder::validate(args, 2))
return false;
+ auto inputs = args.op.inputs;
+ auto outputs = args.op.outputs;
// Multiples (inputs.at(1)) must be one of the following types
// int32, int64
const auto &tensors = args.reader.tensors();
* \- CircleTopKV2Out --- FullyConnected ---
*/
-void CircleTopKV2GraphBuilder::build(const circle::OperatorT &op,
- GraphBuilderContext *context) const
+CircleNode *CircleTopKV2GraphBuilder::build_node(const BuildNodeArgs &bna) const
{
- assert(context != nullptr);
-
- auto graph = context->graph();
-
- const std::vector<int32_t> &inputs = op.inputs;
- const std::vector<int32_t> &outputs = op.outputs;
- const auto &tensors = context->reader()->tensors();
- const auto &opcodes = context->reader()->opcodes();
- auto tensors_ptr = context->reader()->tensors_ptr();
- assert(tensors_ptr != nullptr);
-
- std::vector<CircleNode *> input_nodes;
- for (const int32_t input_tensor_index : inputs)
- {
- input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
- }
-
- // Create CircleTopKV2
- auto node = graph->nodes()->create<CircleTopKV2>();
- node->input(input_nodes[0]);
- node->k(input_nodes[1]);
-
- assert(outputs.size() == 2);
- {
- // Let's use name of output 0 as TopKV2 name
- const circle::TensorT &output_tensor = *tensors[outputs[0]];
- node->name(tensor_name(output_tensor));
- node->op_version(opcodes[op.opcode_index].get()->version);
-
- // NOTE We don't set quantization for TopKV2 itself but to virtual outputs
- }
-
- // Create virtual outputs of TopKV2
- for (size_t n = 0; n < outputs.size(); ++n)
- {
- const circle::TensorT &output_tensor = *tensors[outputs[n]];
-
- auto *nodeout = graph->nodes()->create<CircleTopKV2Out>();
- copy_tensor_attributes(output_tensor, nodeout);
- // mark shape_status
- if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
- nodeout->shape_status(ShapeStatus::NOSHAPE);
- else
- nodeout->shape_status(ShapeStatus::VALID);
-
- nodeout->input(node);
- nodeout->index(n);
-
- context->nodefinder()->enroll(outputs[n], nodeout);
- }
+ auto node = bna.context->graph()->nodes()->create<CircleTopKV2>();
+
+ node->input(bna.input_nodes[0]);
+ node->k(bna.input_nodes[1]);
+
+ return node;
+}
+
+CircleNode *CircleTopKV2GraphBuilder::build_out(const BuildOutArgs &boa) const
+{
+ auto *nodeout = boa.node->graph()->nodes()->create<CircleTopKV2Out>();
+
+ nodeout->input(boa.node);
+ nodeout->index(boa.index);
+
+ return nodeout;
}
} // namespace luci
bool CircleTransposeGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 2)
- return false;
-
- if (args.op.outputs.size() != 1)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 2);
}
CircleNode *CircleTransposeGraphBuilder::build_node(const circle::OperatorT &op,
node->filter(inputs.at(1));
node->outBackprop(inputs.at(2));
if (inputs.size() == 3)
- node->bias(graph->nodes()->create<CircleOutputExclude>());
- else
- node->bias(inputs.at(3));
-
- if (auto bias = dynamic_cast<luci::CircleOutputExclude *>(node->bias()))
{
- // CircleOutputExclude doesn't need a type, but since all nodes must have a type, a dummy type
- // is inserted.
+ auto *bias = graph->nodes()->create<CircleOutputExclude>();
+ // CircleOutputExclude doesn't need a type, but since all nodes must have a type,
+ // a dummy type is inserted.
bias->dtype(loco::DataType::FLOAT32);
+ node->bias(bias);
}
+ else
+ node->bias(inputs.at(3));
const auto *options = op.builtin_options.AsTransposeConvOptions();
node->padding(luci_padding(options->padding));
bool CircleUnidirectionalSequenceLSTMGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 24)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 24);
}
CircleNode *CircleUnidirectionalSequenceLSTMGraphBuilder::build_node(
- const circle::OperatorT &op, const std::vector<CircleNode *> &inputs, loco::Graph *graph) const
+ const circle::OperatorT &op, const std::vector<CircleNode *> &inputs, loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleUnidirectionalSequenceLSTM>();
node->input(inputs.at(0));
node->forget_layer_norm_coefficients(inputs.at(21)); // Optional
node->cell_layer_norm_coefficients(inputs.at(22)); // Optional
node->output_layer_norm_coefficients(inputs.at(23)); // Optional
- const std::vector<int32_t> optionals = {1, 5, 9, 10, 11, 12, 16, 17, 20, 21, 22, 23};
- for (auto optional : optionals)
- {
- if (auto inp = dynamic_cast<luci::CircleOutputExclude *>(node->arg(optional)))
- {
- // CircleOutputExclude doesn't need a type, but since all nodes must have a type, a dummy type
- // is inserted.
- inp->dtype(loco::DataType::FLOAT32);
- }
- }
const auto *options = op.builtin_options.AsUnidirectionalSequenceLSTMOptions();
node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
return true;
}
-void CircleUniqueGraphBuilder::build(const circle::OperatorT &op,
- GraphBuilderContext *context) const
+CircleNode *CircleUniqueGraphBuilder::build_node(const BuildNodeArgs &bna) const
{
- assert(context != nullptr);
+ auto node = bna.context->graph()->nodes()->create<CircleUnique>();
- auto graph = context->graph();
+ node->input(bna.input_nodes[0]);
- const std::vector<int32_t> &inputs = op.inputs;
- const std::vector<int32_t> &outputs = op.outputs;
- const auto &tensors = context->reader()->tensors();
- auto tensors_ptr = context->reader()->tensors_ptr();
- assert(tensors_ptr != nullptr);
+ const auto *options = bna.op.builtin_options.AsUniqueOptions();
+ node->idx_out_type(luci_datatype(options->idx_out_type));
- std::vector<CircleNode *> input_nodes;
- for (const int32_t input_tensor_index : inputs)
- {
- input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
- }
-
- // Create CircleUnique
- auto node = graph->nodes()->create<CircleUnique>();
- node->input(input_nodes[0]);
-
- const auto *options = op.builtin_options.AsUniqueOptions();
- node->output_type(luci_datatype(options->idx_out_type));
-
- assert(int32_t(outputs.size()) == 2);
- // Let's use name of output 0 as Unique name
- const circle::TensorT &output_tensor = *tensors[outputs[0]];
- node->name(tensor_name(output_tensor));
-
- // Create virtual outputs of Unique
- for (int32_t n = 0; n < 2; ++n)
- {
- const circle::TensorT &output_tensor = *tensors[outputs[n]];
+ return node;
+}
- auto *nodeout = graph->nodes()->create<CircleUniqueOut>();
- copy_tensor_attributes(output_tensor, nodeout);
- // mark shape_status
- if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
- nodeout->shape_status(ShapeStatus::NOSHAPE);
- else
- nodeout->shape_status(ShapeStatus::VALID);
+CircleNode *CircleUniqueGraphBuilder::build_out(const BuildOutArgs &boa) const
+{
+ auto *nodeout = boa.node->graph()->nodes()->create<CircleUniqueOut>();
- nodeout->input(node);
- nodeout->index(n);
+ nodeout->input(boa.node);
+ nodeout->index(boa.index);
- context->nodefinder()->enroll(outputs[n], nodeout);
- }
+ return nodeout;
}
} // namespace luci
* \- CircleUnpackOut --- FullyConnected ---
*/
-void CircleUnpackGraphBuilder::build(const circle::OperatorT &op,
- GraphBuilderContext *context) const
+CircleNode *CircleUnpackGraphBuilder::build_node(const BuildNodeArgs &bna) const
{
- assert(context != nullptr);
+ auto node = bna.context->graph()->nodes()->create<CircleUnpack>();
- auto graph = context->graph();
+ node->value(bna.input_nodes[0]);
- const std::vector<int32_t> &inputs = op.inputs;
- const std::vector<int32_t> &outputs = op.outputs;
- const auto &tensors = context->reader()->tensors();
- const auto &opcodes = context->reader()->opcodes();
- auto tensors_ptr = context->reader()->tensors_ptr();
- assert(tensors_ptr != nullptr);
-
- // NOTE Unpack has only one input so running a loop is not necessary
- // This is provided as a reference for other Ops as a reference
- std::vector<CircleNode *> input_nodes;
- for (const int32_t input_tensor_index : inputs)
- {
- input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
- }
-
- // Create CircleUnpack
- CircleUnpack *node = graph->nodes()->create<CircleUnpack>();
- node->value(input_nodes[0]);
-
- const auto *options = op.builtin_options.AsUnpackOptions();
+ const auto *options = bna.op.builtin_options.AsUnpackOptions();
node->num(options->num);
node->axis(options->axis);
- assert(outputs.size() > 0);
- {
- // Let's use name of output 0 as Unpack name
- const circle::TensorT &output_tensor = *tensors[outputs[0]];
- node->name(tensor_name(output_tensor));
- node->op_version(opcodes[op.opcode_index].get()->version);
-
- // NOTE We don't set quantization for Unpack itself but to virtual outputs
- }
-
- // Create virtual outputs of Unpack
- for (int32_t n = 0; n < options->num; ++n)
- {
- const circle::TensorT &output_tensor = *tensors[outputs[n]];
+ return node;
+}
- auto *nodeout = graph->nodes()->create<CircleUnpackOut>();
- copy_tensor_attributes(output_tensor, nodeout);
- // mark shape_status
- if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
- nodeout->shape_status(ShapeStatus::NOSHAPE);
- else
- nodeout->shape_status(ShapeStatus::VALID);
+CircleNode *CircleUnpackGraphBuilder::build_out(const BuildOutArgs &boa) const
+{
+ auto *nodeout = boa.node->graph()->nodes()->create<CircleUnpackOut>();
- nodeout->input(node);
- nodeout->index(n);
+ nodeout->input(boa.node);
+ nodeout->index(boa.index);
- context->nodefinder()->enroll(outputs[n], nodeout);
- }
+ return nodeout;
}
} // namespace luci
bool CircleWhereGraphBuilder::validate(const ValidateArgs &args) const
{
- const auto &inputs = args.op.inputs;
- const auto &outputs = args.op.outputs;
-
- if (inputs.size() != 1)
- return false;
-
- if (outputs.size() != 1)
+ if (!GraphBuilder::validate(args, 1))
return false;
+ const auto &inputs = args.op.inputs;
+ const auto &outputs = args.op.outputs;
const auto &tensors = args.reader.tensors();
const auto &tensor_condition = tensors.at(inputs.at(0));
const auto &tensor_out = tensors.at(outputs[0]);
* \- CircleWhileOut --- Node ---
*/
-void CircleWhileGraphBuilder::build(const circle::OperatorT &op, GraphBuilderContext *context) const
+CircleNode *CircleWhileGraphBuilder::build(const circle::OperatorT &op,
+ GraphBuilderContext *context) const
{
assert(context != nullptr);
context->nodefinder()->enroll(outputs[n], nodeout);
}
+
+ return node;
}
} // namespace luci
bool CircleZerosLikeGraphBuilder::validate(const ValidateArgs &args) const
{
- if (args.op.inputs.size() != 1)
- return false;
-
- if (args.op.outputs.size() != 1)
- return false;
-
- return true;
+ return GraphBuilder::validate(args, 1);
}
CircleNode *CircleZerosLikeGraphBuilder::build_node(const circle::OperatorT &,
namespace
{
/**
- * @brief ValidateNodeProp will validate inter graph connections for each Nodes
+ * @brief ValidateNodeProp will validate inter graph connections for each Nodes.
+ * @note In here, only loco::GraphInput and loco::GraphOutput are validated,
+ * since this class is for checking inter graph connections.
+ * CircleNodes such as CircleInput and CircleOutput will be validated at later steps.
*/
class ValidateNodeProp final : public luci::CircleNodeMutableVisitor<void>
{
auto then_graph_output = then_graph_outputs->at(then_out->index());
auto else_graph_output = else_graph_outputs->at(else_out->index());
- if (!(*then_graph_output->shape() == *else_graph_output->shape()))
+ if (then_graph_output->shape()->rank() != else_graph_output->shape()->rank())
{
- INTERNAL_EXN_V("CircleIf THEN and ELSE Graph Output shape mismatch ", idx);
+ INTERNAL_EXN_V("CircleIf THEN and ELSE Graph Output rank mismatch ", idx);
+ }
+ for (uint32_t i = 0; i < then_graph_output->shape()->rank(); ++i)
+ {
+ if (then_graph_output->shape()->dim(i).known() &&
+ else_graph_output->shape()->dim(i).known() &&
+ then_graph_output->shape()->dim(i).value() !=
+ else_graph_output->shape()->dim(i).value())
+ {
+ INTERNAL_EXN_V("CircleIf THEN and ELSE Graph Output dimension mismatch ", idx);
+ }
}
if (then_graph_output->dtype() != else_graph_output->dtype())
{
auto cond_graph_input = cond_graph_inputs->at(cond_in->index());
auto body_graph_input = body_graph_inputs->at(body_in->index());
- if ((cond_in->rank() != body_in->rank()))
+ if (cond_graph_input->shape()->rank() != body_graph_input->shape()->rank())
{
- INTERNAL_EXN_V("CircleWhile COND input and BODY input shape mismatch ", idx);
+ INTERNAL_EXN_V("CircleWhile COND input and BODY input rank mismatch ", idx);
}
- if (cond_in->rank() > 0 && body_in->rank() > 0)
+ for (uint32_t i = 0; i < cond_graph_input->shape()->rank(); ++i)
{
- if (!(*cond_graph_input->shape() == *body_graph_input->shape()))
+ if (cond_graph_input->shape()->dim(i).known() &&
+ body_graph_input->shape()->dim(i).known() &&
+ cond_graph_input->shape()->dim(i).value() != body_graph_input->shape()->dim(i).value())
{
- INTERNAL_EXN_V("CircleWhile COND input and BODY input shape mismatch ", idx);
+ INTERNAL_EXN_V("CircleWhile COND input and BODY input dimension mismatch ", idx);
}
}
- if (cond_in->dtype() != body_in->dtype())
+ if (cond_graph_input->dtype() != body_graph_input->dtype())
{
INTERNAL_EXN_V("CircleWhile COND input and BODY input type mismatch ", idx);
}
auto cond_graph_input = cond_graph_inputs->at(cond_in->index());
auto body_graph_output = body_graph_outputs->at(body_out->index());
- if ((cond_in->rank() != body_out->rank()))
+ if (cond_graph_input->shape()->rank() != body_graph_output->shape()->rank())
{
- INTERNAL_EXN_V("CircleWhile COND input and BODY output shape mismatch ", idx);
+ INTERNAL_EXN_V("CircleWhile COND input and BODY output rank mismatch ", idx);
}
- if (cond_in->rank() > 0 && body_out->rank() > 0)
+ for (uint32_t i = 0; i < cond_graph_input->shape()->rank(); ++i)
{
- if (!(*cond_graph_input->shape() == *body_graph_output->shape()))
+ if (cond_graph_input->shape()->dim(i).known() &&
+ body_graph_output->shape()->dim(i).known() &&
+ cond_graph_input->shape()->dim(i).value() != body_graph_output->shape()->dim(i).value())
{
- INTERNAL_EXN_V("CircleWhile COND input and BODY output shape mismatch ", idx);
+ INTERNAL_EXN_V("CircleWhile COND input and BODY output dimension mismatch ", idx);
}
}
- if (cond_in->dtype() != body_out->dtype())
+ if (cond_graph_input->dtype() != body_graph_output->dtype())
{
INTERNAL_EXN_V("CircleWhile COND input and BODY output type mismatch ", idx);
}
target_include_directories(luci_lang PUBLIC include)
target_link_libraries(luci_lang PUBLIC loco)
target_link_libraries(luci_lang PUBLIC oops)
+target_link_libraries(luci_lang PUBLIC nncc_coverage)
target_link_libraries(luci_lang PRIVATE logo)
target_link_libraries(luci_lang PRIVATE nncc_common)
#include <loco/IR/Dialect.h>
#include <loco/IR/Node.h>
#include <loco/IR/NodeMixins.h>
-#include <luci/IR/CircleShapeSignature.h>
#include <luci/IR/PropertyShapeStatus.h>
#include "CircleOpcode.h"
_sparsityparam = std::move(sparsityparam);
}
- const ShapeSignature &shape_signature(void) const { return _shape_signature; }
- void shape_signature(const ShapeSignature &ss) { _shape_signature = ss; }
-
ShapeStatus shape_status(void) const { return _shape_status; }
void shape_status(ShapeStatus ss) { _shape_status = ss; }
NodeName _name;
std::unique_ptr<CircleQuantParam> _quantparam;
std::unique_ptr<SparsityParam> _sparsityparam;
- ShapeSignature _shape_signature;
ShapeStatus _shape_status{ShapeStatus::UNDEFINED};
int32_t _op_version = 1;
};
\
case CircleOpcode::OPCODE: \
return v->visit(dynamic_cast<const CLASS *>(this));
+#define CIRCLE_VNODE CIRCLE_NODE
#include "CircleNodes.lst"
+#undef CIRCLE_VNODE
#undef CIRCLE_NODE
default:
\
case CircleOpcode::OPCODE: \
return v->visit(dynamic_cast<CLASS *>(this));
+#define CIRCLE_VNODE CIRCLE_NODE
#include "CircleNodes.lst"
+#undef CIRCLE_VNODE
#undef CIRCLE_NODE
default:
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_NODE_MIXINS_H__
+#define __LUCI_IR_CIRCLE_NODE_MIXINS_H__
+
+#include "luci/IR/AttrFusedActFunc.h"
+
+#include <loco/IR/Node.h>
+#include <loco/IR/NodeMixins.h>
+
+#include <vector>
+
+namespace luci
+{
+
+/// @brief enumeration of mixin class
+enum class CircleNodeTrait
+{
+ FusedActFunc,
+ Bias
+};
+
+template <CircleNodeTrait T> class CircleNodeMixin;
+
+template <> class CircleNodeMixin<CircleNodeTrait::FusedActFunc>
+{
+public:
+ CircleNodeMixin() = default;
+
+public:
+ FusedActFunc fusedActivationFunction() const { return _fused_act_fun; }
+ void fusedActivationFunction(FusedActFunc fused_act_fun) { _fused_act_fun = fused_act_fun; }
+
+private:
+ FusedActFunc _fused_act_fun = FusedActFunc::UNDEFINED;
+};
+
+/**
+ * @brief Mixin class for nodes that has a bias input
+ */
+template <> class CircleNodeMixin<CircleNodeTrait::Bias>
+{
+public:
+ CircleNodeMixin() = default;
+
+public:
+ virtual loco::Node *bias(void) const = 0; /// @brief get the input for bias.
+ virtual void bias(loco::Node *node) = 0; /// @brief set the input for bias.
+};
+
+/**
+ * @brief Nodes with the fixed number of inputs
+ *
+ * TODO Deprecated this class, and use loco::FixedArity instead
+ */
+template <unsigned N, typename Base> class FixedArityNode : public Base
+{
+public:
+ FixedArityNode()
+ {
+ _args.resize(N);
+ for (uint32_t n = 0; n < N; ++n)
+ {
+ _args[n] = std::make_unique<loco::Use>(this);
+ }
+ }
+
+ virtual ~FixedArityNode() = default;
+
+public:
+ unsigned arity(void) const final { return N; }
+
+ loco::Node *arg(uint32_t n) const final { return _args.at(n)->node(); }
+
+ void drop(void) final
+ {
+ for (uint32_t n = 0; n < N; ++n)
+ {
+ _args.at(n)->node(nullptr);
+ }
+ }
+
+protected:
+ // This API allows inherited classes to access "_args" field.
+ loco::Use *at(unsigned n) const { return _args.at(n).get(); }
+
+private:
+ std::vector<std::unique_ptr<loco::Use>> _args{};
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_NODE_MIXINS_H__
virtual ~CircleNodeVisitorBase() = default;
#define CIRCLE_NODE(OPCODE, CIRCLE_CLASS) virtual T visit(const CIRCLE_CLASS *) = 0;
+#define CIRCLE_VNODE CIRCLE_NODE
#include "CircleNodes.lst"
+#undef CIRCLE_VNODE
#undef CIRCLE_NODE
};
#define CIRCLE_NODE(OPCODE, CIRCLE_CLASS) \
virtual T visit(const CIRCLE_CLASS *node) { return visit(static_cast<const CircleNode *>(node)); }
+#define CIRCLE_VNODE CIRCLE_NODE
#include "CircleNodes.lst"
+#undef CIRCLE_VNODE
#undef CIRCLE_NODE
/// @brief Default fallback
virtual ~CircleNodeMutableVisitorBase() = default;
#define CIRCLE_NODE(OPCODE, CIRCLE_CLASS) virtual T visit(CIRCLE_CLASS *) = 0;
+#define CIRCLE_VNODE CIRCLE_NODE
#include "CircleNodes.lst"
+#undef CIRCLE_VNODE
#undef CIRCLE_NODE
};
#define CIRCLE_NODE(OPCODE, CIRCLE_CLASS) \
virtual T visit(CIRCLE_CLASS *node) { return visit(static_cast<CircleNode *>(node)); }
+#define CIRCLE_VNODE CIRCLE_NODE
#include "CircleNodes.lst"
+#undef CIRCLE_VNODE
#undef CIRCLE_NODE
/// @brief Default fallback
#include "Nodes/CircleAveragePool2D.h"
#include "Nodes/CircleBatchMatMul.h"
#include "Nodes/CircleBatchToSpaceND.h"
+#include "Nodes/CircleBidirectionalSequenceLSTM.h"
#include "Nodes/CircleCast.h"
#include "Nodes/CircleCeil.h"
#include "Nodes/CircleConcatenation.h"
#include "Nodes/CircleEqual.h"
#include "Nodes/CircleExp.h"
#include "Nodes/CircleExpandDims.h"
+#include "Nodes/CircleFakeQuant.h"
#include "Nodes/CircleFill.h"
#include "Nodes/CircleFloor.h"
#include "Nodes/CircleFloorDiv.h"
// Virtual nodes
#include "Nodes/CircleInput.h"
#include "Nodes/CircleOutput.h"
+#include "Nodes/CircleBidirectionalSequenceLSTMOut.h"
#include "Nodes/CircleCustomOut.h"
#include "Nodes/CircleIfOut.h"
#include "Nodes/CircleNonMaxSuppressionV4Out.h"
namespace luci
{
-/**
- * @brief Set both CircleReshape's 2nd input as CircleConst, and newShape attribute
- * with same value
- * @note Shape inference for TFLReshape forces them to be same
- *
- * TODO find better place for this helper
- */
-void set_new_shape(CircleReshape *node, int32_t *base, uint32_t size);
-
/// @brief Link GraphOutput with CircleOutput node
void link(loco::GraphOutput *, CircleOutput *);
#error "Define CIRCLE_NODE"
#endif // CIRCLE_NODE
+#ifndef CIRCLE_VNODE
+#error "Define CIRCLE_VNODE"
+#endif // CIRCLE_VNODE
+
//
// PLEASE SORT NODE DECLS IN ALPHABETICAL ORDER
//
CIRCLE_NODE(ARG_MIN, luci::CircleArgMin)
CIRCLE_NODE(AVERAGE_POOL_2D, luci::CircleAveragePool2D)
CIRCLE_NODE(BATCH_TO_SPACE_ND, luci::CircleBatchToSpaceND)
-CIRCLE_NODE(BATCHMATMUL, luci::CircleBatchMatMul)
+CIRCLE_NODE(BATCH_MATMUL, luci::CircleBatchMatMul)
+CIRCLE_NODE(BIDIRECTIONAL_SEQUENCE_LSTM, luci::CircleBidirectionalSequenceLSTM)
CIRCLE_NODE(CAST, luci::CircleCast)
CIRCLE_NODE(CEIL, luci::CircleCeil)
CIRCLE_NODE(CONCATENATION, luci::CircleConcatenation)
CIRCLE_NODE(EQUAL, luci::CircleEqual)
CIRCLE_NODE(EXP, luci::CircleExp)
CIRCLE_NODE(EXPAND_DIMS, luci::CircleExpandDims)
+CIRCLE_NODE(FAKE_QUANT, luci::CircleFakeQuant)
CIRCLE_NODE(FILL, luci::CircleFill)
CIRCLE_NODE(FLOOR, luci::CircleFloor)
CIRCLE_NODE(FLOOR_DIV, luci::CircleFloorDiv)
CIRCLE_NODE(BCQ_GATHER, luci::CircleBCQGather)
CIRCLE_NODE(INSTANCE_NORM, luci::CircleInstanceNorm)
// Virtual node(s)
-CIRCLE_NODE(CIRCLECONST, luci::CircleConst)
-CIRCLE_NODE(CIRCLEINPUT, luci::CircleInput)
-CIRCLE_NODE(CIRCLEOUTPUT, luci::CircleOutput)
-CIRCLE_NODE(CIRCLEOUTPUTDUMMY, luci::CircleOutputDummy)
-CIRCLE_NODE(CIRCLEOUTPUTEXCLUDE, luci::CircleOutputExclude)
-CIRCLE_NODE(CIRCLECUSTOMOUT, luci::CircleCustomOut)
-CIRCLE_NODE(CIRCLEIFOUT, luci::CircleIfOut)
-CIRCLE_NODE(CIRCLENONMAXSUPPRESSIONV4OUT, luci::CircleNonMaxSuppressionV4Out)
-CIRCLE_NODE(CIRCLENONMAXSUPPRESSIONV5OUT, luci::CircleNonMaxSuppressionV5Out)
-CIRCLE_NODE(CIRCLESPLITOUT, luci::CircleSplitOut)
-CIRCLE_NODE(CIRCLESPLITVOUT, luci::CircleSplitVOut)
-CIRCLE_NODE(CIRCLETOPKV2OUT, luci::CircleTopKV2Out)
-CIRCLE_NODE(CIRCLEUNIQUEOUT, luci::CircleUniqueOut)
-CIRCLE_NODE(CIRCLEUNPACKOUT, luci::CircleUnpackOut)
-CIRCLE_NODE(CIRCLEWHILEOUT, luci::CircleWhileOut)
+CIRCLE_VNODE(CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT, luci::CircleBidirectionalSequenceLSTMOut)
+CIRCLE_VNODE(CIRCLECONST, luci::CircleConst)
+CIRCLE_VNODE(CIRCLEINPUT, luci::CircleInput)
+CIRCLE_VNODE(CIRCLEOUTPUT, luci::CircleOutput)
+CIRCLE_VNODE(CIRCLEOUTPUTDUMMY, luci::CircleOutputDummy)
+CIRCLE_VNODE(CIRCLEOUTPUTEXCLUDE, luci::CircleOutputExclude)
+CIRCLE_VNODE(CIRCLECUSTOMOUT, luci::CircleCustomOut)
+CIRCLE_VNODE(CIRCLEIFOUT, luci::CircleIfOut)
+CIRCLE_VNODE(CIRCLENONMAXSUPPRESSIONV4OUT, luci::CircleNonMaxSuppressionV4Out)
+CIRCLE_VNODE(CIRCLENONMAXSUPPRESSIONV5OUT, luci::CircleNonMaxSuppressionV5Out)
+CIRCLE_VNODE(CIRCLESPLITOUT, luci::CircleSplitOut)
+CIRCLE_VNODE(CIRCLESPLITVOUT, luci::CircleSplitVOut)
+CIRCLE_VNODE(CIRCLETOPKV2OUT, luci::CircleTopKV2Out)
+CIRCLE_VNODE(CIRCLEUNIQUEOUT, luci::CircleUniqueOut)
+CIRCLE_VNODE(CIRCLEUNPACKOUT, luci::CircleUnpackOut)
+CIRCLE_VNODE(CIRCLEWHILEOUT, luci::CircleWhileOut)
enum class CircleOpcode
{
#define CIRCLE_NODE(OPCODE, CLASS) OPCODE,
+#define CIRCLE_VNODE CIRCLE_NODE
#include "CircleNodes.lst"
+#undef CIRCLE_VNODE
#undef CIRCLE_NODE
};
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_IR_SHAPE_SIGNATURE_H__
-#define __LUCI_IR_SHAPE_SIGNATURE_H__
-
-#include <stdint.h>
-#include <vector>
-
-namespace luci
-{
-
-class ShapeSignature
-{
-public:
- ShapeSignature() = default;
-
- ShapeSignature(const std::vector<int32_t> &shape_signature)
- {
- _shape_signature = shape_signature;
- }
-
-public:
- const std::vector<int32_t> &as_vector() const { return _shape_signature; }
-
- int32_t dim(uint32_t d) const { return _shape_signature.at(d); }
- int32_t &dim(uint32_t d) { return _shape_signature.at(d); }
-
- uint32_t rank(void) const { return _shape_signature.size(); }
- void rank(uint32_t rank) { _shape_signature.resize(rank); }
-
-private:
- std::vector<int32_t> _shape_signature{};
-};
-
-bool operator==(const ShapeSignature &lhs, const ShapeSignature &rhs);
-
-} // namespace luci
-
-#endif // __LUCI_IR_SHAPE_SIGNATURE_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_LANG_DEADNODEQUERYSERVICE_H__
+#define __LUCI_LANG_DEADNODEQUERYSERVICE_H__
+
+#include <logo/DeadNodeQueryService.h>
+
+#include <loco/IR/Node.h>
+
+namespace luci
+{
+
+struct DeadNodeQueryServiceImpl final : public logo::DeadNodeQueryService
+{
+ bool isDeadNode(loco::Node *node) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_LANG_DEADNODEQUERYSERVICE_H__
#ifndef __LUCI_IR_LUCINODEMIXINS_H__
#define __LUCI_IR_LUCINODEMIXINS_H__
-#include "luci/IR/AttrFusedActFunc.h"
+// TODO remove this file after LuciNodeTrait and LuciNodeMixin are not used in backend
-#include <loco/IR/Node.h>
-#include <loco/IR/NodeMixins.h>
-
-#include <vector>
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
-/// @brief enumeration of mixin class
-enum class LuciNodeTrait
-{
- FusedActFunc,
- Bias
-};
-
-template <LuciNodeTrait T> class LuciNodeMixin;
-
-template <> class LuciNodeMixin<LuciNodeTrait::FusedActFunc>
-{
-public:
- LuciNodeMixin() = default;
-
-public:
- FusedActFunc fusedActivationFunction() const { return _fused_act_fun; }
- void fusedActivationFunction(FusedActFunc fused_act_fun) { _fused_act_fun = fused_act_fun; }
-
-private:
- FusedActFunc _fused_act_fun = FusedActFunc::UNDEFINED;
-};
-
-/**
- * @brief Mixin class for nodes that has a bias input
- */
-template <> class LuciNodeMixin<LuciNodeTrait::Bias>
-{
-public:
- LuciNodeMixin() = default;
-
-public:
- virtual loco::Node *bias(void) const = 0; /// @brief get the input for bias.
- virtual void bias(loco::Node *node) = 0; /// @brief set the input for bias.
-};
-
-/**
- * @brief Nodes with the fixed number of inputs
- *
- * TODO Deprecated this class, and use loco::FixedArity instead
- */
-template <unsigned N, typename Base> class FixedArityNode : public Base
-{
-public:
- FixedArityNode()
- {
- _args.resize(N);
- for (uint32_t n = 0; n < N; ++n)
- {
- _args[n] = std::make_unique<loco::Use>(this);
- }
- }
-
- virtual ~FixedArityNode() = default;
-
-public:
- unsigned arity(void) const final { return N; }
-
- loco::Node *arg(uint32_t n) const final { return _args.at(n)->node(); }
-
- void drop(void) final
- {
- for (uint32_t n = 0; n < N; ++n)
- {
- _args.at(n)->node(nullptr);
- }
- }
-
-protected:
- // This API allows inherited classes to access "_args" field.
- loco::Use *at(unsigned n) const { return _args.at(n).get(); }
+using LuciNodeTrait = CircleNodeTrait;
-private:
- std::vector<std::unique_ptr<loco::Use>> _args{};
-};
+template <LuciNodeTrait T> using LuciNodeMixin = CircleNodeMixin<T>;
} // namespace luci
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleOpcode.h"
#include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief ADD in Circle
*/
class CircleAdd final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::ADD>>,
- public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+ public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
{
public:
loco::Node *x(void) const { return at(0)->node(); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/AttrPadding.h"
#include "luci/IR/AttrStride.h"
#include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief AVERAGE_POOL_2D in Circle
*/
class CircleAveragePool2D final
- : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::AVERAGE_POOL_2D>>,
- public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+ : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::AVERAGE_POOL_2D>>,
+ public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
{
-public:
- CircleAveragePool2D() : _padding(Padding::UNDEFINED) { /* empty */}
-
public:
loco::Node *value(void) const { return at(0)->node(); }
void value(loco::Node *node) { at(0)->node(node); }
+public:
Padding padding() const { return _padding; }
void padding(Padding padding) { _padding = padding; }
Stride *stride(void) { return &_stride; }
private:
- Padding _padding;
+ Padding _padding{Padding::UNDEFINED};
Stride _stride;
Filter _filter;
};
#include "luci/IR/CircleOpcode.h"
#include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief BCQ_FULLY_CONNECTED in Circle
*/
class CircleBCQFullyConnected final
- : public FixedArityNode<5, CircleNodeImpl<CircleOpcode::BCQ_FULLY_CONNECTED>>,
- public LuciNodeMixin<LuciNodeTrait::FusedActFunc>,
- public LuciNodeMixin<LuciNodeTrait::Bias>
+ : public FixedArityNode<5, CircleNodeImpl<CircleOpcode::BCQ_FULLY_CONNECTED>>,
+ public CircleNodeMixin<CircleNodeTrait::FusedActFunc>,
+ public CircleNodeMixin<CircleNodeTrait::Bias>
{
public:
loco::Node *input(void) const { return at(0)->node(); }
}
private:
- int32_t _weights_hidden_size = 0;
+ int32_t _weights_hidden_size{0};
};
} // namespace luci
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
void input_hidden_size(int32_t input_hidden_size) { _input_hidden_size = input_hidden_size; }
private:
- int32_t _axis = 0;
- int32_t _input_hidden_size = 0;
+ int32_t _axis{0};
+ int32_t _input_hidden_size{0};
};
} // namespace luci
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
/**
- * @brief BATCHMATMUL in Circle
+ * @brief BATCH_MATMUL in Circle
*/
-class CircleBatchMatMul final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::BATCHMATMUL>>
+class CircleBatchMatMul final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::BATCH_MATMUL>>
{
public:
loco::Node *x(void) const { return at(0)->node(); }
void adj_y(bool arg) { _adj_y = arg; }
private:
- bool _adj_x = false;
- bool _adj_y = false;
+ bool _adj_x{false};
+ bool _adj_y{false};
};
} // namespace luci
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief BATCH_TO_SPACE_ND in Circle
*/
class CircleBatchToSpaceND final
- : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::BATCH_TO_SPACE_ND>>
+ : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::BATCH_TO_SPACE_ND>>
{
public:
loco::Node *input(void) const { return at(0)->node(); }
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLEBIDIRECTIONALSEQUENCE_LSTM_H__
+#define __LUCI_IR_CIRCLEBIDIRECTIONALSEQUENCE_LSTM_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/AttrFusedActFunc.h"
+#include "luci/IR/CircleNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief BIDIRECTIONAL_SEQUENCE_LSTM in Circle
+ */
+class CircleBidirectionalSequenceLSTM final
+ : public FixedArityNode<48, CircleNodeImpl<CircleOpcode::BIDIRECTIONAL_SEQUENCE_LSTM>>,
+ public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
+{
+public:
+ loco::Node *input(void) const { return at(0)->node(); }
+ void input(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *fw_input_to_input_weights(void) const { return at(1)->node(); }
+ void fw_input_to_input_weights(loco::Node *node) { at(1)->node(node); }
+ loco::Node *fw_input_to_forget_weights(void) const { return at(2)->node(); }
+ void fw_input_to_forget_weights(loco::Node *node) { at(2)->node(node); }
+ loco::Node *fw_input_to_cell_weights(void) const { return at(3)->node(); }
+ void fw_input_to_cell_weights(loco::Node *node) { at(3)->node(node); }
+ loco::Node *fw_input_to_output_weights(void) const { return at(4)->node(); }
+ void fw_input_to_output_weights(loco::Node *node) { at(4)->node(node); }
+
+ loco::Node *fw_recurrent_to_input_weights(void) const { return at(5)->node(); }
+ void fw_recurrent_to_input_weights(loco::Node *node) { at(5)->node(node); }
+ loco::Node *fw_recurrent_to_forget_weights(void) const { return at(6)->node(); }
+ void fw_recurrent_to_forget_weights(loco::Node *node) { at(6)->node(node); }
+ loco::Node *fw_recurrent_to_cell_weights(void) const { return at(7)->node(); }
+ void fw_recurrent_to_cell_weights(loco::Node *node) { at(7)->node(node); }
+ loco::Node *fw_recurrent_to_output_weights(void) const { return at(8)->node(); }
+ void fw_recurrent_to_output_weights(loco::Node *node) { at(8)->node(node); }
+
+ loco::Node *fw_cell_to_input_weights(void) const { return at(9)->node(); }
+ void fw_cell_to_input_weights(loco::Node *node) { at(9)->node(node); }
+ loco::Node *fw_cell_to_forget_weights(void) const { return at(10)->node(); }
+ void fw_cell_to_forget_weights(loco::Node *node) { at(10)->node(node); }
+ loco::Node *fw_cell_to_output_weights(void) const { return at(11)->node(); }
+ void fw_cell_to_output_weights(loco::Node *node) { at(11)->node(node); }
+
+ loco::Node *fw_input_gate_bias(void) const { return at(12)->node(); }
+ void fw_input_gate_bias(loco::Node *node) { at(12)->node(node); }
+ loco::Node *fw_forget_gate_bias(void) const { return at(13)->node(); }
+ void fw_forget_gate_bias(loco::Node *node) { at(13)->node(node); }
+ loco::Node *fw_cell_gate_bias(void) const { return at(14)->node(); }
+ void fw_cell_gate_bias(loco::Node *node) { at(14)->node(node); }
+ loco::Node *fw_output_gate_bias(void) const { return at(15)->node(); }
+ void fw_output_gate_bias(loco::Node *node) { at(15)->node(node); }
+
+ loco::Node *fw_projection_weights(void) const { return at(16)->node(); }
+ void fw_projection_weights(loco::Node *node) { at(16)->node(node); }
+ loco::Node *fw_projection_bias(void) const { return at(17)->node(); }
+ void fw_projection_bias(loco::Node *node) { at(17)->node(node); }
+
+ loco::Node *bw_input_to_input_weights(void) const { return at(18)->node(); }
+ void bw_input_to_input_weights(loco::Node *node) { at(18)->node(node); }
+ loco::Node *bw_input_to_forget_weights(void) const { return at(19)->node(); }
+ void bw_input_to_forget_weights(loco::Node *node) { at(19)->node(node); }
+ loco::Node *bw_input_to_cell_weights(void) const { return at(20)->node(); }
+ void bw_input_to_cell_weights(loco::Node *node) { at(20)->node(node); }
+ loco::Node *bw_input_to_output_weights(void) const { return at(21)->node(); }
+ void bw_input_to_output_weights(loco::Node *node) { at(21)->node(node); }
+
+ loco::Node *bw_recurrent_to_input_weights(void) const { return at(22)->node(); }
+ void bw_recurrent_to_input_weights(loco::Node *node) { at(22)->node(node); }
+ loco::Node *bw_recurrent_to_forget_weights(void) const { return at(23)->node(); }
+ void bw_recurrent_to_forget_weights(loco::Node *node) { at(23)->node(node); }
+ loco::Node *bw_recurrent_to_cell_weights(void) const { return at(24)->node(); }
+ void bw_recurrent_to_cell_weights(loco::Node *node) { at(24)->node(node); }
+ loco::Node *bw_recurrent_to_output_weights(void) const { return at(25)->node(); }
+ void bw_recurrent_to_output_weights(loco::Node *node) { at(25)->node(node); }
+
+ loco::Node *bw_cell_to_input_weights(void) const { return at(26)->node(); }
+ void bw_cell_to_input_weights(loco::Node *node) { at(26)->node(node); }
+ loco::Node *bw_cell_to_forget_weights(void) const { return at(27)->node(); }
+ void bw_cell_to_forget_weights(loco::Node *node) { at(27)->node(node); }
+ loco::Node *bw_cell_to_output_weights(void) const { return at(28)->node(); }
+ void bw_cell_to_output_weights(loco::Node *node) { at(28)->node(node); }
+
+ loco::Node *bw_input_gate_bias(void) const { return at(29)->node(); }
+ void bw_input_gate_bias(loco::Node *node) { at(29)->node(node); }
+ loco::Node *bw_forget_gate_bias(void) const { return at(30)->node(); }
+ void bw_forget_gate_bias(loco::Node *node) { at(30)->node(node); }
+ loco::Node *bw_cell_gate_bias(void) const { return at(31)->node(); }
+ void bw_cell_gate_bias(loco::Node *node) { at(31)->node(node); }
+ loco::Node *bw_output_gate_bias(void) const { return at(32)->node(); }
+ void bw_output_gate_bias(loco::Node *node) { at(32)->node(node); }
+
+ loco::Node *bw_projection_weights(void) const { return at(33)->node(); }
+ void bw_projection_weights(loco::Node *node) { at(33)->node(node); }
+ loco::Node *bw_projection_bias(void) const { return at(34)->node(); }
+ void bw_projection_bias(loco::Node *node) { at(34)->node(node); }
+
+ loco::Node *fw_activation_state(void) const { return at(35)->node(); }
+ void fw_activation_state(loco::Node *node) { at(35)->node(node); }
+ loco::Node *fw_cell_state(void) const { return at(36)->node(); }
+ void fw_cell_state(loco::Node *node) { at(36)->node(node); }
+
+ loco::Node *bw_activation_state(void) const { return at(37)->node(); }
+ void bw_activation_state(loco::Node *node) { at(37)->node(node); }
+ loco::Node *bw_cell_state(void) const { return at(38)->node(); }
+ void bw_cell_state(loco::Node *node) { at(38)->node(node); }
+
+ loco::Node *auxillary_input(void) const { return at(39)->node(); }
+ void auxillary_input(loco::Node *node) { at(39)->node(node); }
+ loco::Node *fw_auxillary_input_to_input_weights(void) const { return at(40)->node(); }
+ void fw_auxillary_input_to_input_weights(loco::Node *node) { at(40)->node(node); }
+ loco::Node *fw_auxillary_input_to_forget_weights(void) const { return at(41)->node(); }
+ void fw_auxillary_input_to_forget_weights(loco::Node *node) { at(41)->node(node); }
+ loco::Node *fw_auxillary_input_to_cell_weights(void) const { return at(42)->node(); }
+ void fw_auxillary_input_to_cell_weights(loco::Node *node) { at(42)->node(node); }
+ loco::Node *fw_auxillary_input_to_output_weights(void) const { return at(43)->node(); }
+ void fw_auxillary_input_to_output_weights(loco::Node *node) { at(43)->node(node); }
+ loco::Node *bw_auxillary_input_to_input_weights(void) const { return at(44)->node(); }
+ void bw_auxillary_input_to_input_weights(loco::Node *node) { at(44)->node(node); }
+ loco::Node *bw_auxillary_input_to_forget_weights(void) const { return at(45)->node(); }
+ void bw_auxillary_input_to_forget_weights(loco::Node *node) { at(45)->node(node); }
+ loco::Node *bw_auxillary_input_to_cell_weights(void) const { return at(46)->node(); }
+ void bw_auxillary_input_to_cell_weights(loco::Node *node) { at(46)->node(node); }
+ loco::Node *bw_auxillary_input_to_output_weights(void) const { return at(47)->node(); }
+ void bw_auxillary_input_to_output_weights(loco::Node *node) { at(47)->node(node); }
+
+public:
+ float cell_clip(void) const { return _cell_clip; }
+ void cell_clip(float cell_clip) { _cell_clip = cell_clip; }
+ float proj_clip(void) const { return _proj_clip; }
+ void proj_clip(float proj_clip) { _proj_clip = proj_clip; }
+ bool merge_outputs(void) const { return _merge_outputs; }
+ void merge_outputs(bool merge_outputs) { _merge_outputs = merge_outputs; }
+ bool time_major(void) const { return _time_major; }
+ void time_major(bool time_major) { _time_major = time_major; }
+ bool asymmetric_quantize_inputs(void) const { return _asymmetric_quantize_inputs; }
+ void asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+ {
+ _asymmetric_quantize_inputs = asymmetric_quantize_inputs;
+ }
+
+private:
+ float _cell_clip{0.0f};
+ float _proj_clip{0.0f};
+ bool _merge_outputs{false};
+ bool _time_major{false};
+ bool _asymmetric_quantize_inputs{false};
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLEBIDIRECTIONALSEQUENCE_LSTM_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_BIDIRECTIONAL_SEQUENCE_LSTM_OUT_H__
+#define __LUCI_IR_CIRCLE_BIDIRECTIONAL_SEQUENCE_LSTM_OUT_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/CircleNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief Virtual CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT in Circle
+ */
+class CircleBidirectionalSequenceLSTMOut final
+ : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT>>
+{
+public:
+ loco::Node *input(void) const { return at(0)->node(); }
+ void input(loco::Node *node) { at(0)->node(node); }
+
+public:
+ int32_t index(void) const { return _index; }
+ void index(int32_t index) { _index = index; }
+
+private:
+ int32_t _index{-1};
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_BIDIRECTIONAL_SEQUENCE_LSTM_OUT_H__
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleOpcode.h"
#include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
#include "luci/IR/VariadicArityNode.h"
#include <cassert>
* @brief CONCATENATION in Circle
*/
class CircleConcatenation final
- : public VariadicArityNode<CircleNodeImpl<CircleOpcode::CONCATENATION>>,
- public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+ : public VariadicArityNode<CircleNodeImpl<CircleOpcode::CONCATENATION>>,
+ public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
{
public:
CircleConcatenation(uint32_t arity)
- : VariadicArityNode<CircleNodeImpl<CircleOpcode::CONCATENATION>>(arity)
+ : VariadicArityNode<CircleNodeImpl<CircleOpcode::CONCATENATION>>(arity)
{
// TODO Support when arity is 0
assert(arity >= 1);
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
#include <loco/IR/DataTypeTraits.h>
*/
class CircleConst final : public FixedArityNode<0, CircleNodeImpl<CircleOpcode::CIRCLECONST>>
{
-public:
- CircleConst() = default;
-
public:
template <loco::DataType DT> uint32_t size(void) const;
template <loco::DataType DT> void size(uint32_t size);
#include "luci/IR/AttrStride.h"
#include "luci/IR/AttrDilation.h"
#include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief CONV_2D in Circle
*/
class CircleConv2D final : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::CONV_2D>>,
- public LuciNodeMixin<LuciNodeTrait::FusedActFunc>,
- public LuciNodeMixin<LuciNodeTrait::Bias>
+ public CircleNodeMixin<CircleNodeTrait::FusedActFunc>,
+ public CircleNodeMixin<CircleNodeTrait::Bias>
{
public:
loco::Node *input(void) const { return at(0)->node(); }
Dilation *dilation(void) { return &_dilation; }
private:
- Padding _padding = Padding::UNDEFINED;
+ Padding _padding{Padding::UNDEFINED};
Stride _stride;
Dilation _dilation;
};
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
class CircleCustom final : public VariadicArityNode<CircleNodeImpl<CircleOpcode::CUSTOM>>
{
public:
- CircleCustom(uint32_t arity) : VariadicArityNode<CircleNodeImpl<CircleOpcode::CUSTOM>>(arity)
+ CircleCustom(uint32_t arity, uint32_t out)
+ : VariadicArityNode<CircleNodeImpl<CircleOpcode::CUSTOM>>(arity), _output_count(out)
{
// TODO Support when arity is 0
assert(arity >= 1);
+ assert(out > 0);
}
public:
uint32_t numInputs(void) const { return arity(); }
+ uint32_t numOutputs(void) const { return _output_count; }
public:
Node *inputs(uint32_t index) const { return at(index)->node(); }
void inputs(uint32_t index, Node *node) { at(index)->node(node); }
+public:
const std::vector<uint8_t> &custom_options(void) const { return _custom_options; }
void custom_options(const std::vector<uint8_t> &custom_options)
{
private:
std::vector<uint8_t> _custom_options;
std::string _custom_code;
+ uint32_t _output_count{0};
};
} // namespace luci
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief Virtual CIRCLECUSTOMOUT in Circle
*/
class CircleCustomOut final
- : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLECUSTOMOUT>>
+ : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLECUSTOMOUT>>
{
-public:
- CircleCustomOut() = default;
-
public:
loco::Node *input(void) const { return at(0)->node(); }
void input(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief DEPTH_TO_SPACE in Circle
*/
class CircleDepthToSpace final
- : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::DEPTH_TO_SPACE>>
+ : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::DEPTH_TO_SPACE>>
{
public:
loco::Node *input(void) const { return at(0)->node(); }
void input(loco::Node *node) { at(0)->node(node); }
public:
- int block_size(void) const { return _block_size; }
- void block_size(int block_size) { _block_size = block_size; }
+ int32_t block_size(void) const { return _block_size; }
+ void block_size(int32_t block_size) { _block_size = block_size; }
private:
- int _block_size{0};
+ int32_t _block_size{0};
};
} // namespace luci
#include "luci/IR/AttrPadding.h"
#include "luci/IR/AttrStride.h"
#include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief DEPTHWISE_CONV_2D in Circle
*/
class CircleDepthwiseConv2D final
- : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::DEPTHWISE_CONV_2D>>,
- public LuciNodeMixin<LuciNodeTrait::FusedActFunc>,
- public LuciNodeMixin<LuciNodeTrait::Bias>
+ : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::DEPTHWISE_CONV_2D>>,
+ public CircleNodeMixin<CircleNodeTrait::FusedActFunc>,
+ public CircleNodeMixin<CircleNodeTrait::Bias>
{
public:
loco::Node *input(void) const { return at(0)->node(); }
Dilation *dilation(void) { return &_dilation; }
private:
- Padding _padding = Padding::UNDEFINED;
+ Padding _padding{Padding::UNDEFINED};
Stride _stride;
- int32_t _depth_multiplier = 0;
+ int32_t _depth_multiplier{0};
Dilation _dilation;
};
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/AttrPadding.h"
#include "luci/IR/AttrStride.h"
#include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief DIV in Circle
*/
class CircleDiv final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::DIV>>,
- public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+ public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
{
-public:
- CircleDiv() = default;
-
public:
loco::Node *x(void) const { return at(0)->node(); }
void x(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
*/
class CircleElu final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::ELU>>
{
-public:
- CircleElu() = default;
-
public:
loco::Node *features(void) const { return at(0)->node(); }
void features(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
*/
class CircleExpandDims final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::EXPAND_DIMS>>
{
-public:
- CircleExpandDims() = default;
-
public:
loco::Node *input(void) const { return at(0)->node(); }
void input(loco::Node *node) { at(0)->node(node); }
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_FAKE_QUANT_H__
+#define __LUCI_IR_CIRCLE_FAKE_QUANT_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/CircleNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief FAKE_QUANT in Circle
+ * @note 'inputs' came from TF.quantize.fake_quant_from_min_max_vars
+ */
+class CircleFakeQuant final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::FAKE_QUANT>>
+{
+public:
+ loco::Node *inputs(void) const { return at(0)->node(); }
+ void inputs(loco::Node *node) { at(0)->node(node); }
+
+public:
+ float min(void) const { return _min; }
+ void min(float min) { _min = min; }
+
+ float max(void) const { return _max; }
+ void max(float max) { _max = max; }
+
+ int32_t num_bits(void) const { return _num_bits; }
+ void num_bits(int32_t num_bits) { _num_bits = num_bits; }
+
+ bool narrow_range(void) const { return _narrow_range; }
+ void narrow_range(bool narrow_range) { _narrow_range = narrow_range; }
+
+private:
+ float _min{0.0f};
+ float _max{0.0f};
+ int32_t _num_bits{0};
+ bool _narrow_range{false};
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLEGATHER_H__
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleOpcode.h"
#include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief FULLY_CONNECTED in Circle
*/
class CircleFullyConnected final
- : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::FULLY_CONNECTED>>,
- public LuciNodeMixin<LuciNodeTrait::FusedActFunc>,
- public LuciNodeMixin<LuciNodeTrait::Bias>
+ : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::FULLY_CONNECTED>>,
+ public CircleNodeMixin<CircleNodeTrait::FusedActFunc>,
+ public CircleNodeMixin<CircleNodeTrait::Bias>
{
public:
enum class WeightsFormat
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
void axis(int32_t axis) { _axis = axis; }
private:
- int32_t _axis = 0;
+ int32_t _axis{0};
};
} // namespace luci
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief GREATER EQUAL in Circle
*/
class CircleGreaterEqual final
- : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::GREATER_EQUAL>>
+ : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::GREATER_EQUAL>>
{
public:
loco::Node *x(void) const { return at(0)->node(); }
{
public:
CircleIf(uint32_t arity, uint32_t out)
- : VariadicArityNode<CircleNodeImpl<CircleOpcode::IF>>(arity + 1), _output_count(out)
+ : VariadicArityNode<CircleNodeImpl<CircleOpcode::IF>>(arity + 1), _output_count(out)
{
assert(arity > 0);
assert(out > 0);
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
*/
class CircleIfOut final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLEIFOUT>>
{
-public:
- CircleIfOut() = default;
-
public:
loco::Node *input(void) const { return at(0)->node(); }
void input(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
#include <loco/IR/DataTypeTraits.h>
#include <loco/IR/GraphInputIndex.h>
*/
class CircleInput final : public FixedArityNode<0, CircleNodeImpl<CircleOpcode::CIRCLEINPUT>>
{
-public:
- CircleInput() = default;
-
public:
void index(const loco::GraphInputIndex &index);
loco::GraphInputIndex index(void) const;
bool indexed(void) const { return _index != -1; }
private:
- int64_t _index = -1; // Uninitialized
+ int64_t _index{-1}; // Uninitialized
};
} // namespace luci
#include "luci/IR/CircleOpcode.h"
#include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief INSTANCE_NORM in Circle
*/
class CircleInstanceNorm final
- : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::INSTANCE_NORM>>,
- public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+ : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::INSTANCE_NORM>>,
+ public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
{
public:
/// @note Currently only support FLOAT32 as input node
loco::Node *beta(void) const { return at(2)->node(); }
void beta(loco::Node *node) { at(2)->node(node); }
+public:
float epsilon() const { return _epsilon; }
void epsilon(float epsilon) { _epsilon = epsilon; }
private:
- float _epsilon = 1e-05;
+ float _epsilon{1e-05};
};
} // namespace luci
#include "luci/IR/CircleOpcode.h"
#include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief L2_NORMALIZATION in Circle
*/
class CircleL2Normalize final
- : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::L2_NORMALIZATION>>,
- public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+ : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::L2_NORMALIZATION>>,
+ public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
{
public:
loco::Node *x(void) const { return at(0)->node(); }
#include "luci/IR/AttrPadding.h"
#include "luci/IR/AttrStride.h"
#include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief L2_POOL_2D in Circle
*/
class CircleL2Pool2D final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::L2_POOL_2D>>,
- public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+ public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
{
-public:
- CircleL2Pool2D() : _padding(Padding::UNDEFINED) { /* empty */}
-
public:
loco::Node *value(void) const { return at(0)->node(); }
void value(loco::Node *node) { at(0)->node(node); }
+public:
Padding padding() const { return _padding; }
void padding(Padding padding) { _padding = padding; }
Stride *stride(void) { return &_stride; }
private:
- Padding _padding;
+ Padding _padding{Padding::UNDEFINED};
Stride _stride;
Filter _filter;
};
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
*/
class CircleLeakyRelu final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::LEAKY_RELU>>
{
-public:
- CircleLeakyRelu() = default;
-
public:
loco::Node *features(void) const { return at(0)->node(); }
void features(loco::Node *node) { at(0)->node(node); }
+public:
float alpha() const { return _alpha; }
void alpha(float alpha) { _alpha = alpha; }
private:
- float _alpha = 0.2f;
+ float _alpha{0.2f};
};
} // namespace luci
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief LOCAL_RESPONSE_NORMALIZATION in Circle
*/
class CircleLocalResponseNormalization final
- : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::LOCAL_RESPONSE_NORMALIZATION>>
+ : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::LOCAL_RESPONSE_NORMALIZATION>>
{
public:
loco::Node *input(void) const { return at(0)->node(); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
*/
class CircleLogistic final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::LOGISTIC>>
{
-public:
- CircleLogistic() = default;
-
public:
loco::Node *x(void) const { return at(0)->node(); }
void x(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief MATRIX_SET_DIAG in Circle
*/
class CircleMatrixSetDiag final
- : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::MATRIX_SET_DIAG>>
+ : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::MATRIX_SET_DIAG>>
{
public:
loco::Node *input(void) const { return at(0)->node(); }
#include "luci/IR/AttrPadding.h"
#include "luci/IR/AttrStride.h"
#include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief MAX_POOL_2D in Circle
*/
class CircleMaxPool2D final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::MAX_POOL_2D>>,
- public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+ public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
{
-public:
- CircleMaxPool2D() : _padding(Padding::UNDEFINED) { /* empty */}
-
public:
loco::Node *value(void) const { return at(0)->node(); }
void value(loco::Node *node) { at(0)->node(node); }
+public:
Padding padding() const { return _padding; }
void padding(Padding padding) { _padding = padding; }
Stride *stride(void) { return &_stride; }
private:
- Padding _padding;
+ Padding _padding{Padding::UNDEFINED};
Stride _stride;
Filter _filter;
};
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
void keep_dims(bool keep_dims) { _keep_dims = keep_dims; }
private:
- bool _keep_dims = false;
+ bool _keep_dims{false};
};
} // namespace luci
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
#include "luci/IR/AttrMirrorPadMode.h"
namespace luci
*/
class CircleMirrorPad final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::MIRROR_PAD>>
{
-public:
- CircleMirrorPad() = default;
-
public:
loco::Node *input(void) const { return at(0)->node(); }
void input(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleOpcode.h"
#include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief MUL in Circle
*/
class CircleMul final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::MUL>>,
- public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+ public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
{
public:
loco::Node *x(void) const { return at(0)->node(); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief NON_MAX_SUPPRESSION_V4 in Circle
*/
class CircleNonMaxSuppressionV4 final
- : public FixedArityNode<5, CircleNodeImpl<CircleOpcode::NON_MAX_SUPPRESSION_V4>>
+ : public FixedArityNode<5, CircleNodeImpl<CircleOpcode::NON_MAX_SUPPRESSION_V4>>
{
public:
loco::Node *boxes(void) const { return at(0)->node(); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief Virtual NONMAXSUPPRESSIONV4OUT in Circle
*/
class CircleNonMaxSuppressionV4Out final
- : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLENONMAXSUPPRESSIONV4OUT>>
+ : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLENONMAXSUPPRESSIONV4OUT>>
{
-public:
- CircleNonMaxSuppressionV4Out() = default;
-
public:
loco::Node *input(void) const { return at(0)->node(); }
void input(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief NON_MAX_SUPPRESSION_V5 in Circle
*/
class CircleNonMaxSuppressionV5 final
- : public FixedArityNode<6, CircleNodeImpl<CircleOpcode::NON_MAX_SUPPRESSION_V5>>
+ : public FixedArityNode<6, CircleNodeImpl<CircleOpcode::NON_MAX_SUPPRESSION_V5>>
{
public:
loco::Node *boxes(void) const { return at(0)->node(); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief Virtual NONMAXSUPPRESSIONV5OUT in Circle
*/
class CircleNonMaxSuppressionV5Out final
- : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT>>
+ : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT>>
{
-public:
- CircleNonMaxSuppressionV5Out() = default;
-
public:
loco::Node *input(void) const { return at(0)->node(); }
void input(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
void axis(int32_t axis) { _axis = axis; }
private:
- int32_t _axis = -1;
+ int32_t _axis{-1};
};
} // namespace luci
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
#include <loco/IR/GraphOutputIndex.h>
class CircleOutput final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLEOUTPUT>>
{
public:
- CircleOutput() = default;
-
void index(const loco::GraphOutputIndex &index);
loco::GraphOutputIndex index(void) const;
void from(loco::Node *node) { at(0)->node(node); }
private:
- int64_t _index = -1; // Uninitialized
+ int64_t _index{-1}; // Uninitialized
};
/**
*/
// TODO remove CircleOutputDummy
class CircleOutputDummy final
- : public FixedArityNode<0, CircleNodeImpl<CircleOpcode::CIRCLEOUTPUTDUMMY>>
+ : public FixedArityNode<0, CircleNodeImpl<CircleOpcode::CIRCLEOUTPUTDUMMY>>
{
public:
CircleOutputDummy() = default;
* @brief CircleOutputExclude is used to specifying not exported nodes
*/
class CircleOutputExclude final
- : public FixedArityNode<0, CircleNodeImpl<CircleOpcode::CIRCLEOUTPUTEXCLUDE>>
+ : public FixedArityNode<0, CircleNodeImpl<CircleOpcode::CIRCLEOUTPUTEXCLUDE>>
{
public:
CircleOutputExclude() = default;
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
*/
class CirclePRelu final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::PRELU>>
{
-public:
- CirclePRelu() = default;
-
public:
loco::Node *input(void) const { return at(0)->node(); }
void input(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
*/
class CirclePad final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::PAD>>
{
-public:
- CirclePad() = default;
-
public:
loco::Node *input(void) const { return at(0)->node(); }
void input(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
*/
class CirclePadV2 final : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::PADV2>>
{
-public:
- CirclePadV2() = default;
-
public:
loco::Node *input(void) const { return at(0)->node(); }
void input(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
*/
class CirclePow final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::POW>>
{
-public:
- CirclePow() = default;
-
public:
loco::Node *x(void) const { return at(0)->node(); }
void x(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
void keep_dims(bool keep_dims) { _keep_dims = keep_dims; }
private:
- bool _keep_dims = false;
+ bool _keep_dims{false};
};
} // namespace luci
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
void keep_dims(bool keep_dims) { _keep_dims = keep_dims; }
private:
- bool _keep_dims = false;
+ bool _keep_dims{false};
};
} // namespace luci
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
void keep_dims(bool keep_dims) { _keep_dims = keep_dims; }
private:
- bool _keep_dims = false;
+ bool _keep_dims{false};
};
} // namespace luci
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
void keep_dims(bool keep_dims) { _keep_dims = keep_dims; }
private:
- bool _keep_dims = false;
+ bool _keep_dims{false};
};
} // namespace luci
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
*/
class CircleRelu final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::RELU>>
{
-public:
- CircleRelu() = default;
-
public:
loco::Node *features(void) const { return at(0)->node(); }
void features(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
*/
class CircleRelu6 final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::RELU6>>
{
-public:
- CircleRelu6() = default;
-
public:
loco::Node *features(void) const { return at(0)->node(); }
void features(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
*/
class CircleReluN1To1 final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::RELU_N1_TO_1>>
{
-public:
- CircleReluN1To1() = default;
-
public:
loco::Node *features(void) const { return at(0)->node(); }
void features(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
*/
class CircleReshape final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::RESHAPE>>
{
-public:
- CircleReshape() = default;
-
public:
loco::Node *tensor(void) const { return at(0)->node(); }
void tensor(loco::Node *node) { at(0)->node(node); }
// NOTE shape is optional and can be CircleConst or any other type
- // and also can be CircleOutputDummy when reshape option does not exist
+ // and also should be CircleOutputDummy when reshape option does not exist
loco::Node *shape(void) const { return at(1)->node(); }
void shape(loco::Node *node) { at(1)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief RESIZE_BILINEAR in Circle
*/
class CircleResizeBilinear final
- : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::RESIZE_BILINEAR>>
+ : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::RESIZE_BILINEAR>>
{
-public:
- CircleResizeBilinear() = default;
-
public:
loco::Node *input(void) const { return at(0)->node(); }
void input(loco::Node *node) { at(0)->node(node); }
loco::Node *size(void) const { return at(1)->node(); }
void size(loco::Node *node) { at(1)->node(node); }
+public:
bool align_corners() const { return _align_corners; }
void align_corners(bool value) { _align_corners = value; }
void half_pixel_centers(bool value) { _half_pixel_centers = value; }
private:
- bool _align_corners = false;
- bool _half_pixel_centers = false;
+ bool _align_corners{false};
+ bool _half_pixel_centers{false};
};
} // namespace luci
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief RESIZE_NEAREST_NEIGHBOR in Circle
*/
class CircleResizeNearestNeighbor final
- : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::RESIZE_NEAREST_NEIGHBOR>>
+ : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::RESIZE_NEAREST_NEIGHBOR>>
{
-public:
- CircleResizeNearestNeighbor() = default;
-
public:
loco::Node *input(void) const { return at(0)->node(); }
void input(loco::Node *node) { at(0)->node(node); }
loco::Node *size(void) const { return at(1)->node(); }
void size(loco::Node *node) { at(1)->node(node); }
+public:
bool align_corners() const { return _align_corners; }
void align_corners(bool value) { _align_corners = value; }
private:
- bool _align_corners = false;
+ bool _align_corners{false};
};
} // namespace luci
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief REVERSE_SEQUENCE in Circle
*/
class CircleReverseSequence final
- : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::REVERSE_SEQUENCE>>
+ : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::REVERSE_SEQUENCE>>
{
-public:
- CircleReverseSequence() = default;
-
public:
loco::Node *input(void) const { return at(0)->node(); }
void input(loco::Node *node) { at(0)->node(node); }
void seq_lengths(loco::Node *node) { at(1)->node(node); }
public:
- int seq_axis(void) const { return _seq_axis; }
- void seq_axis(int seq_axis) { _seq_axis = seq_axis; }
+ int32_t seq_axis(void) const { return _seq_axis; }
+ void seq_axis(int32_t seq_axis) { _seq_axis = seq_axis; }
- int batch_axis(void) const { return _batch_axis; }
- void batch_axis(int batch_axis) { _batch_axis = batch_axis; }
+ int32_t batch_axis(void) const { return _batch_axis; }
+ void batch_axis(int32_t batch_axis) { _batch_axis = batch_axis; }
private:
- int _seq_axis{0};
- int _batch_axis{0};
+ int32_t _seq_axis{0};
+ int32_t _batch_axis{0};
};
} // namespace luci
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
*/
class CircleRound final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::ROUND>>
{
-public:
- CircleRound() = default;
-
public:
loco::Node *x(void) const { return at(0)->node(); }
void x(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
*/
class CircleRsqrt final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::RSQRT>>
{
-public:
- CircleRsqrt() = default;
-
public:
loco::Node *x(void) const { return at(0)->node(); }
void x(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
*/
class CircleSegmentSum final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::SEGMENT_SUM>>
{
-public:
- CircleSegmentSum() = default;
-
public:
loco::Node *input(void) const { return at(0)->node(); }
void input(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
*/
class CircleSelect final : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::SELECT>>
{
-public:
- CircleSelect() = default;
-
public:
loco::Node *condition(void) const { return at(0)->node(); }
void condition(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
*/
class CircleSelectV2 final : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::SELECT_V2>>
{
-public:
- CircleSelectV2() = default;
-
public:
loco::Node *condition(void) const { return at(0)->node(); }
void condition(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
*/
class CircleShape final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::SHAPE>>
{
-public:
- CircleShape() = default;
-
public:
loco::Node *input(void) const { return at(0)->node(); }
void input(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief SPACE_TO_BATCH_ND in Circle
*/
class CircleSpaceToBatchND final
- : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::SPACE_TO_BATCH_ND>>
+ : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::SPACE_TO_BATCH_ND>>
{
public:
loco::Node *input(void) const { return at(0)->node(); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief SPACE_TO_DEPTH in Circle
*/
class CircleSpaceToDepth final
- : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::SPACE_TO_DEPTH>>
+ : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::SPACE_TO_DEPTH>>
{
public:
loco::Node *input(void) const { return at(0)->node(); }
void input(loco::Node *node) { at(0)->node(node); }
public:
- int block_size(void) const { return _block_size; }
- void block_size(int block_size) { _block_size = block_size; }
+ int32_t block_size(void) const { return _block_size; }
+ void block_size(int32_t block_size) { _block_size = block_size; }
private:
- int _block_size{0};
+ int32_t _block_size{0};
};
} // namespace luci
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief SPARSE_TO_DENSE in Circle
*/
class CircleSparseToDense final
- : public FixedArityNode<4, CircleNodeImpl<CircleOpcode::SPARSE_TO_DENSE>>
+ : public FixedArityNode<4, CircleNodeImpl<CircleOpcode::SPARSE_TO_DENSE>>
{
public:
loco::Node *indices(void) const { return at(0)->node(); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
*/
class CircleSplitOut final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLESPLITOUT>>
{
-public:
- CircleSplitOut() = default;
-
public:
loco::Node *input(void) const { return at(0)->node(); }
void input(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief Virtual CIRCLESPLITVOUT in Circle
*/
class CircleSplitVOut final
- : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLESPLITVOUT>>
+ : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLESPLITVOUT>>
{
-public:
- CircleSplitVOut() = default;
-
public:
loco::Node *input(void) const { return at(0)->node(); }
void input(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
*/
class CircleSqrt final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::SQRT>>
{
-public:
- CircleSqrt() = default;
-
public:
loco::Node *x(void) const { return at(0)->node(); }
void x(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
*/
class CircleSquare final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::SQUARE>>
{
-public:
- CircleSquare() = default;
-
public:
loco::Node *x(void) const { return at(0)->node(); }
void x(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief SQUARED_DIFFERENCE in Circle
*/
class CircleSquaredDifference final
- : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::SQUARED_DIFFERENCE>>
+ : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::SQUARED_DIFFERENCE>>
{
-public:
- CircleSquaredDifference() = default;
-
public:
loco::Node *x(void) const { return at(0)->node(); }
void x(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
*/
class CircleSqueeze final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::SQUEEZE>>
{
-public:
- CircleSqueeze() = default;
-
public:
loco::Node *input(void) const { return at(0)->node(); }
void input(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief STRIDED_SLICE in Circle
*/
class CircleStridedSlice final
- : public FixedArityNode<4, CircleNodeImpl<CircleOpcode::STRIDED_SLICE>>
+ : public FixedArityNode<4, CircleNodeImpl<CircleOpcode::STRIDED_SLICE>>
{
public:
loco::Node *input(void) const { return at(0)->node(); }
#include "luci/IR/CircleOpcode.h"
#include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief SUB in Circle
*/
class CircleSub final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::SUB>>,
- public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+ public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
{
-public:
- CircleSub() = default;
-
public:
loco::Node *x(void) const { return at(0)->node(); }
void x(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
*/
class CircleTanh final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::TANH>>
{
-public:
- CircleTanh() = default;
-
public:
loco::Node *x(void) const { return at(0)->node(); }
void x(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
*/
class CircleTile final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::TILE>>
{
-public:
- CircleTile() = default;
-
public:
loco::Node *input(void) const { return at(0)->node(); }
void input(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
*/
class CircleTopKV2 final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::TOPK_V2>>
{
-public:
- CircleTopKV2() = default;
-
public:
loco::Node *input(void) const { return at(0)->node(); }
void input(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief Virtual CIRCLETOPKV2OUT in Circle
*/
class CircleTopKV2Out final
- : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLETOPKV2OUT>>
+ : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLETOPKV2OUT>>
{
-public:
- CircleTopKV2Out() = default;
-
public:
loco::Node *input(void) const { return at(0)->node(); }
void input(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
class CircleTranspose final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::TRANSPOSE>>
{
public:
- CircleTranspose() = default;
-
-public:
- /// @brief Get the input node to transpose
loco::Node *a(void) const { return at(0)->node(); }
-
- /// @brief Set the input node to transpose
void a(loco::Node *node) { at(0)->node(node); }
loco::Node *perm(void) const { return at(1)->node(); }
#include "luci/IR/AttrPadding.h"
#include "luci/IR/AttrStride.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* 'out' acutally means 'out' and 'in' of the this node.
*/
class CircleTransposeConv final
- : public FixedArityNode<4, CircleNodeImpl<CircleOpcode::TRANSPOSE_CONV>>,
- public LuciNodeMixin<LuciNodeTrait::Bias>
+ : public FixedArityNode<4, CircleNodeImpl<CircleOpcode::TRANSPOSE_CONV>>,
+ public CircleNodeMixin<CircleNodeTrait::Bias>
{
public:
loco::Node *inputSizes(void) const { return at(0)->node(); }
#include "luci/IR/CircleOpcode.h"
#include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief UNIDIRECTIONAL_SEQUENCE_LSTM in Circle
*/
class CircleUnidirectionalSequenceLSTM final
- : public FixedArityNode<24, CircleNodeImpl<CircleOpcode::UNIDIRECTIONAL_SEQUENCE_LSTM>>,
- public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+ : public FixedArityNode<24, CircleNodeImpl<CircleOpcode::UNIDIRECTIONAL_SEQUENCE_LSTM>>,
+ public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
{
public:
loco::Node *input(void) const { return at(0)->node(); }
}
private:
- float _cell_clip = 0.0f;
- float _proj_clip = 0.0f;
- bool _time_major = false;
- bool _asymmetric_quantize_inputs = false;
+ float _cell_clip{0.0f};
+ float _proj_clip{0.0f};
+ bool _time_major{false};
+ bool _asymmetric_quantize_inputs{false};
};
} // namespace luci
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
public:
loco::DataType idx_out_type(void) const { return _idx_out_type; }
- void output_type(loco::DataType ot) { _idx_out_type = ot; }
+ void idx_out_type(loco::DataType ot) { _idx_out_type = ot; }
private:
loco::DataType _idx_out_type{loco::DataType::S32};
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief Virtual CIRCLEUNIQUEOUT in Circle
*/
class CircleUniqueOut final
- : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLEUNIQUEOUT>>
+ : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLEUNIQUEOUT>>
{
-public:
- CircleUniqueOut() = default;
-
public:
loco::Node *input(void) const { return at(0)->node(); }
void input(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
*/
class CircleUnpack final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::UNPACK>>
{
-public:
- CircleUnpack() = default;
-
public:
loco::Node *value(void) const { return at(0)->node(); }
void value(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
* @brief Virtual CIRCLEUNPACKOUT in Circle
*/
class CircleUnpackOut final
- : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLEUNPACKOUT>>
+ : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLEUNPACKOUT>>
{
-public:
- CircleUnpackOut() = default;
-
public:
loco::Node *input(void) const { return at(0)->node(); }
void input(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
#include <cassert>
*/
class CircleWhere final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::WHERE>>
{
-public:
- CircleWhere() = default;
-
public:
loco::Node *condition() const { return at(0)->node(); }
void condition(loco::Node *node) { at(0)->node(node); }
{
public:
CircleWhile(uint32_t arity, uint32_t out)
- : VariadicArityNode<CircleNodeImpl<CircleOpcode::WHILE>>(arity), _output_count(out)
+ : VariadicArityNode<CircleNodeImpl<CircleOpcode::WHILE>>(arity), _output_count(out)
{
assert(arity > 0);
assert(out > 0);
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
*/
class CircleWhileOut final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLEWHILEOUT>>
{
-public:
- CircleWhileOut() = default;
-
public:
loco::Node *input(void) const { return at(0)->node(); }
void input(loco::Node *node) { at(0)->node(node); }
#include "luci/IR/CircleNodeDecl.h"
#include "luci/IR/CircleOpcode.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
class CircleZerosLike final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::ZEROS_LIKE>>
{
public:
- CircleZerosLike() = default;
-
-public:
- /// @brief Get the input node
loco::Node *input(void) const { return at(0)->node(); }
-
- /// @brief Set the input node
void input(loco::Node *node) { at(0)->node(node); }
};
public:
SparseIndexVector() = default;
SparseIndexVector(const SparseIndexVectorType &type, const std::vector<int32_t> &sparse_index_vec)
- : _type{type}
+ : _type{type}
{
switch (type)
{
case SparseIndexVectorType::I32:
{
_vec_ptr = static_cast<void *>(
- new std::vector<int32_t>(sparse_index_vec.begin(), sparse_index_vec.end()));
+ new std::vector<int32_t>(sparse_index_vec.begin(), sparse_index_vec.end()));
break;
}
case SparseIndexVectorType::U16:
case SparseIndexVectorType::I32:
{
const std::vector<int32_t> *vec =
- static_cast<const std::vector<int32_t> *>(sparse_index_vec);
+ static_cast<const std::vector<int32_t> *>(sparse_index_vec);
_vec_ptr = static_cast<void *>(new std::vector<int32_t>(vec->begin(), vec->end()));
break;
}
case SparseIndexVectorType::U16:
{
const std::vector<uint16_t> *vec =
- static_cast<const std::vector<uint16_t> *>(sparse_index_vec);
+ static_cast<const std::vector<uint16_t> *>(sparse_index_vec);
_vec_ptr = static_cast<void *>(new std::vector<uint16_t>(vec->begin(), vec->end()));
break;
}
case SparseIndexVectorType::U8:
{
const std::vector<uint8_t> *vec =
- static_cast<const std::vector<uint8_t> *>(sparse_index_vec);
+ static_cast<const std::vector<uint8_t> *>(sparse_index_vec);
_vec_ptr = static_cast<void *>(new std::vector<uint8_t>(vec->begin(), vec->end()));
break;
}
}
SparseIndexVector(const SparseIndexVector &sparse_index_vec)
- : SparseIndexVector(sparse_index_vec._type, sparse_index_vec._vec_ptr)
+ : SparseIndexVector(sparse_index_vec._type, sparse_index_vec._vec_ptr)
{
}
SparseIndexVector(SparseIndexVector &&sparse_index_vec)
- : _type{sparse_index_vec._type}, _vec_ptr{std::exchange(sparse_index_vec._vec_ptr, nullptr)}
+ : _type{sparse_index_vec._type}, _vec_ptr{std::exchange(sparse_index_vec._vec_ptr, nullptr)}
{
}
const std::vector<uint16_t> *as_uint16_vector(void) const
{
return _type == SparseIndexVectorType::U16
- ? static_cast<const std::vector<uint16_t> *>(_vec_ptr)
- : nullptr;
+ ? static_cast<const std::vector<uint16_t> *>(_vec_ptr)
+ : nullptr;
}
const std::vector<uint8_t> *as_uint8_vector(void) const
{
}
DimMetaData(DimensionType format, int32_t dense_size, const SparseIndexVector &array_segments,
const SparseIndexVector &array_indices)
- : _format{format}, _dense_size{dense_size}, _array_segments{array_segments},
- _array_indices{array_indices}
+ : _format{format}, _dense_size{dense_size}, _array_segments{array_segments}, _array_indices{
+ array_indices}
{
// DO NOTHING
}
*/
#include "luci/IR/CircleDialect.h"
+#include "luci/IR/DeadNodeQueryService.h"
#include "luci/IR/Nodes/CircleInput.h"
#include "luci/IR/Nodes/CircleOutput.h"
#include <loco/IR/GraphInputIndex.h>
#include <loco/IR/GraphOutputIndex.h>
-#include "DeadNodeQueryService.h"
-
#include <cassert>
#include <memory>
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This is to validate CircleNodeMixins.h
+#include "luci/IR/CircleNodeMixins.h"
namespace luci
{
-void set_new_shape(CircleReshape *node, int32_t *base, uint32_t size)
-{
- // Check node does not have both of new shape infos
- LUCI_ASSERT(node->shape() == nullptr, "node already has shape input");
- LUCI_ASSERT(node->newShape()->rank() == 0, "node already has newShape attribute");
-
- const loco::DataType S32 = loco::DataType::S32;
-
- // Set 2nd input as CircleConst
- auto const_shape_node = node->graph()->nodes()->create<CircleConst>();
- const_shape_node->rank(1);
- const_shape_node->dim(0) = size;
- const_shape_node->dtype(S32);
- const_shape_node->size<S32>(size);
- const_shape_node->shape_status(luci::ShapeStatus::VALID);
- for (uint32_t axis = 0; axis < size; ++axis)
- const_shape_node->at<S32>(axis) = base[axis];
- node->shape(const_shape_node);
-
- // Set newShape attribute
- node->newShape()->rank(size);
- for (uint32_t axis = 0; axis < size; ++axis)
- node->newShape()->dim(axis) = base[axis];
-}
-
void link(loco::GraphOutput *output, CircleOutput *node) { node->index(output->index()); }
CircleOutput *output_node(loco::Graph *g, const loco::GraphOutputIndex &index)
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci/IR/CircleShapeSignature.h"
-
-namespace luci
-{
-
-bool operator==(const ShapeSignature &lhs, const ShapeSignature &rhs)
-{
- if (lhs.rank() != rhs.rank())
- return false;
-
- for (uint32_t i = 0; i < lhs.rank(); ++i)
- if (lhs.dim(i) != rhs.dim(i))
- return false;
-
- return true;
-}
-
-} // namespace luci
* limitations under the License.
*/
-#include "DeadNodeQueryService.h"
-
#include "luci/IR/CircleNodeVisitor.h"
+#include "luci/IR/DeadNodeQueryService.h"
#include <loco/IR/Graph.h>
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_LANG_DEADNODEQUERYSERVICE_H__
-#define __LUCI_LANG_DEADNODEQUERYSERVICE_H__
-
-#include <logo/DeadNodeQueryService.h>
-
-#include <loco/IR/Node.h>
-
-namespace luci
-{
-
-struct DeadNodeQueryServiceImpl final : public logo::DeadNodeQueryService
-{
- bool isDeadNode(loco::Node *node) final;
-};
-
-} // namespace luci
-
-#endif // __LUCI_LANG_DEADNODEQUERYSERVICE_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// This is to validate LuciNodeMixins.h
-#include "luci/IR/LuciNodeMixins.h"
luci::CircleBatchMatMul batchmatmul_node;
ASSERT_EQ(luci::CircleDialect::get(), batchmatmul_node.dialect());
- ASSERT_EQ(luci::CircleOpcode::BATCHMATMUL, batchmatmul_node.opcode());
+ ASSERT_EQ(luci::CircleOpcode::BATCH_MATMUL, batchmatmul_node.opcode());
ASSERT_EQ(nullptr, batchmatmul_node.x());
ASSERT_EQ(nullptr, batchmatmul_node.y());
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleBidirectionalSequenceLSTM.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleBidirectionalSequenceLSTMTest, constructor_P)
+{
+ luci::CircleBidirectionalSequenceLSTM trc_node;
+
+ ASSERT_EQ(luci::CircleDialect::get(), trc_node.dialect());
+ ASSERT_EQ(luci::CircleOpcode::BIDIRECTIONAL_SEQUENCE_LSTM, trc_node.opcode());
+
+ ASSERT_EQ(nullptr, trc_node.input());
+
+ ASSERT_EQ(nullptr, trc_node.fw_input_to_input_weights());
+ ASSERT_EQ(nullptr, trc_node.fw_input_to_forget_weights());
+ ASSERT_EQ(nullptr, trc_node.fw_input_to_cell_weights());
+ ASSERT_EQ(nullptr, trc_node.fw_input_to_output_weights());
+
+ ASSERT_EQ(nullptr, trc_node.fw_recurrent_to_input_weights());
+ ASSERT_EQ(nullptr, trc_node.fw_recurrent_to_forget_weights());
+ ASSERT_EQ(nullptr, trc_node.fw_recurrent_to_cell_weights());
+ ASSERT_EQ(nullptr, trc_node.fw_recurrent_to_output_weights());
+
+ ASSERT_EQ(nullptr, trc_node.fw_cell_to_input_weights());
+ ASSERT_EQ(nullptr, trc_node.fw_cell_to_forget_weights());
+ ASSERT_EQ(nullptr, trc_node.fw_cell_to_output_weights());
+
+ ASSERT_EQ(nullptr, trc_node.fw_input_gate_bias());
+ ASSERT_EQ(nullptr, trc_node.fw_forget_gate_bias());
+ ASSERT_EQ(nullptr, trc_node.fw_cell_gate_bias());
+ ASSERT_EQ(nullptr, trc_node.fw_output_gate_bias());
+
+ ASSERT_EQ(nullptr, trc_node.fw_projection_weights());
+ ASSERT_EQ(nullptr, trc_node.fw_projection_bias());
+
+ ASSERT_EQ(nullptr, trc_node.bw_input_to_input_weights());
+ ASSERT_EQ(nullptr, trc_node.bw_input_to_forget_weights());
+ ASSERT_EQ(nullptr, trc_node.bw_input_to_cell_weights());
+ ASSERT_EQ(nullptr, trc_node.bw_input_to_output_weights());
+
+ ASSERT_EQ(nullptr, trc_node.bw_recurrent_to_input_weights());
+ ASSERT_EQ(nullptr, trc_node.bw_recurrent_to_forget_weights());
+ ASSERT_EQ(nullptr, trc_node.bw_recurrent_to_cell_weights());
+ ASSERT_EQ(nullptr, trc_node.bw_recurrent_to_output_weights());
+
+ ASSERT_EQ(nullptr, trc_node.bw_cell_to_input_weights());
+ ASSERT_EQ(nullptr, trc_node.bw_cell_to_forget_weights());
+ ASSERT_EQ(nullptr, trc_node.bw_cell_to_output_weights());
+
+ ASSERT_EQ(nullptr, trc_node.bw_input_gate_bias());
+ ASSERT_EQ(nullptr, trc_node.bw_forget_gate_bias());
+ ASSERT_EQ(nullptr, trc_node.bw_cell_gate_bias());
+ ASSERT_EQ(nullptr, trc_node.bw_output_gate_bias());
+
+ ASSERT_EQ(nullptr, trc_node.bw_projection_weights());
+ ASSERT_EQ(nullptr, trc_node.bw_projection_bias());
+
+ ASSERT_EQ(nullptr, trc_node.fw_activation_state());
+ ASSERT_EQ(nullptr, trc_node.fw_cell_state());
+ ASSERT_EQ(nullptr, trc_node.bw_activation_state());
+ ASSERT_EQ(nullptr, trc_node.bw_cell_state());
+
+ ASSERT_EQ(nullptr, trc_node.auxillary_input());
+ ASSERT_EQ(nullptr, trc_node.fw_auxillary_input_to_input_weights());
+ ASSERT_EQ(nullptr, trc_node.fw_auxillary_input_to_forget_weights());
+ ASSERT_EQ(nullptr, trc_node.fw_auxillary_input_to_cell_weights());
+ ASSERT_EQ(nullptr, trc_node.fw_auxillary_input_to_output_weights());
+ ASSERT_EQ(nullptr, trc_node.bw_auxillary_input_to_input_weights());
+ ASSERT_EQ(nullptr, trc_node.bw_auxillary_input_to_forget_weights());
+ ASSERT_EQ(nullptr, trc_node.bw_auxillary_input_to_cell_weights());
+ ASSERT_EQ(nullptr, trc_node.bw_auxillary_input_to_output_weights());
+
+ ASSERT_EQ(luci::FusedActFunc::UNDEFINED, trc_node.fusedActivationFunction());
+ ASSERT_EQ(0.f, trc_node.cell_clip());
+ ASSERT_EQ(0.f, trc_node.proj_clip());
+ ASSERT_EQ(false, trc_node.merge_outputs());
+ ASSERT_EQ(false, trc_node.time_major());
+ ASSERT_EQ(false, trc_node.asymmetric_quantize_inputs());
+}
+
+TEST(CircleBidirectionalSequenceLSTMTest, arity_NEG)
+{
+ luci::CircleBidirectionalSequenceLSTM trc_node;
+
+ ASSERT_NO_THROW(trc_node.arg(36));
+ ASSERT_THROW(trc_node.arg(48), std::out_of_range);
+}
+
+TEST(CircleBidirectionalSequenceLSTMTest, visit_mutable_NEG)
+{
+ struct TestVisitor final : public luci::CircleNodeMutableVisitor<void>
+ {
+ };
+
+ luci::CircleBidirectionalSequenceLSTM trc_node;
+
+ TestVisitor tv;
+ ASSERT_THROW(trc_node.accept(&tv), std::exception);
+}
+
+TEST(CircleBidirectionalSequenceLSTMTest, visit_NEG)
+{
+ struct TestVisitor final : public luci::CircleNodeVisitor<void>
+ {
+ };
+
+ luci::CircleBidirectionalSequenceLSTM trc_node;
+
+ TestVisitor tv;
+ ASSERT_THROW(trc_node.accept(&tv), std::exception);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleConst.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleConstTest, constructor)
+{
+ luci::CircleConst const_node;
+
+ ASSERT_EQ(luci::CircleDialect::get(), const_node.dialect());
+ ASSERT_EQ(luci::CircleOpcode::CIRCLECONST, const_node.opcode());
+}
+
+TEST(CircleConstTest, dype_size)
+{
+ luci::CircleConst const_node;
+
+ const_node.dtype(loco::DataType::S32);
+ const_node.size<loco::DataType::S32>(1);
+
+ ASSERT_EQ(loco::DataType::S32, const_node.dtype());
+ ASSERT_EQ(1, const_node.size<loco::DataType::S32>());
+}
+
+TEST(CircleConstTest, scalar)
+{
+ luci::CircleConst const_node;
+
+ const_node.dtype(loco::DataType::S32);
+ const_node.size<loco::DataType::S32>(1);
+ const_node.scalar<loco::DataType::S32>() = 1;
+
+ auto const &cs = const_node.scalar<loco::DataType::S32>();
+ ASSERT_EQ(1, cs);
+}
TEST(CircleCustomTest, constructor)
{
- luci::CircleCustom custom_node(2);
+ luci::CircleCustom custom_node(2, 1);
ASSERT_EQ(luci::CircleDialect::get(), custom_node.dialect());
ASSERT_EQ(luci::CircleOpcode::CUSTOM, custom_node.opcode());
ASSERT_EQ(2, custom_node.numInputs());
ASSERT_EQ(0, custom_node.custom_code().size());
+ ASSERT_EQ(1, custom_node.numOutputs());
}
TEST(CircleCustomTest, constructor_NEG)
{
- ASSERT_DEBUG_DEATH(luci::CircleCustom{0}, "");
+ ASSERT_DEBUG_DEATH(luci::CircleCustom(0, 0), "");
SUCCEED();
}
TEST(CircleCustomTest, invalidIndex_NEG)
{
- luci::CircleCustom custom_node(2);
+ luci::CircleCustom custom_node(2, 1);
EXPECT_ANY_THROW(custom_node.arg(5));
}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleFakeQuant.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleFakeQuantTest, constructor_P)
+{
+ luci::CircleFakeQuant fakequant;
+
+ ASSERT_EQ(fakequant.dialect(), luci::CircleDialect::get());
+ ASSERT_EQ(fakequant.opcode(), luci::CircleOpcode::FAKE_QUANT);
+
+ ASSERT_EQ(nullptr, fakequant.inputs());
+ ASSERT_EQ(0.0f, fakequant.min());
+ ASSERT_EQ(0.0f, fakequant.max());
+ ASSERT_EQ(0, fakequant.num_bits());
+ ASSERT_FALSE(fakequant.narrow_range());
+}
#define CIRCLE_NODE(OPCODE, CLASS) \
case luci::CircleOpcode::OPCODE: \
return prefix + #OPCODE;
+#define CIRCLE_VNODE CIRCLE_NODE
#include <luci/IR/CircleNodes.lst>
+#undef CIRCLE_VNODE
#undef CIRCLE_NODE
default:
break;
s.state(locop::NodeSummary::State::PartiallyKnown); \
return true; \
}
+#define CIRCLE_VNODE CIRCLE_NODE
#include <luci/IR/CircleNodes.lst>
+#undef CIRCLE_VNODE
#undef CIRCLE_NODE
protected:
IMPLEMENT(luci::CircleAveragePool2D)
IMPLEMENT(luci::CircleBatchMatMul)
IMPLEMENT(luci::CircleBatchToSpaceND)
+ IMPLEMENT(luci::CircleBidirectionalSequenceLSTM)
IMPLEMENT(luci::CircleCast)
IMPLEMENT(luci::CircleCeil)
IMPLEMENT(luci::CircleConcatenation)
IMPLEMENT(luci::CircleElu)
IMPLEMENT(luci::CircleExp)
IMPLEMENT(luci::CircleExpandDims)
+ IMPLEMENT(luci::CircleFakeQuant)
IMPLEMENT(luci::CircleFill)
IMPLEMENT(luci::CircleFloor)
IMPLEMENT(luci::CircleFloorDiv)
return true;
}
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBidirectionalSequenceLSTM *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("input", tbl->lookup(node->input()));
+
+ s.args().append("fw_input_to_input_weights", tbl->lookup(node->fw_input_to_input_weights()));
+ s.args().append("fw_input_to_forget_weights", tbl->lookup(node->fw_input_to_forget_weights()));
+ s.args().append("fw_input_to_cell_weights", tbl->lookup(node->fw_input_to_cell_weights()));
+ s.args().append("fw_input_to_output_weights", tbl->lookup(node->fw_input_to_output_weights()));
+
+ s.args().append("fw_recurrent_to_input_weights",
+ tbl->lookup(node->fw_recurrent_to_input_weights()));
+ s.args().append("fw_recurrent_to_forget_weights",
+ tbl->lookup(node->fw_recurrent_to_forget_weights()));
+ s.args().append("fw_recurrent_to_cell_weights",
+ tbl->lookup(node->fw_recurrent_to_cell_weights()));
+ s.args().append("fw_recurrent_to_output_weights",
+ tbl->lookup(node->fw_recurrent_to_output_weights()));
+
+ s.args().append("fw_cell_to_input_weights", tbl->lookup(node->fw_cell_to_input_weights()));
+ s.args().append("fw_cell_to_forget_weights", tbl->lookup(node->fw_cell_to_forget_weights()));
+ s.args().append("fw_cell_to_output_weights", tbl->lookup(node->fw_cell_to_output_weights()));
+
+ s.args().append("fw_input_gate_bias", tbl->lookup(node->fw_input_gate_bias()));
+ s.args().append("fw_forget_gate_bias", tbl->lookup(node->fw_forget_gate_bias()));
+ s.args().append("fw_cell_gate_bias", tbl->lookup(node->fw_cell_gate_bias()));
+ s.args().append("fw_output_gate_bias", tbl->lookup(node->fw_output_gate_bias()));
+
+ s.args().append("fw_projection_weights", tbl->lookup(node->fw_projection_weights()));
+ s.args().append("fw_projection_bias", tbl->lookup(node->fw_projection_bias()));
+
+ s.args().append("bw_input_to_input_weights", tbl->lookup(node->bw_input_to_input_weights()));
+ s.args().append("bw_input_to_forget_weights", tbl->lookup(node->bw_input_to_forget_weights()));
+ s.args().append("bw_input_to_cell_weights", tbl->lookup(node->bw_input_to_cell_weights()));
+ s.args().append("bw_input_to_output_weights", tbl->lookup(node->bw_input_to_output_weights()));
+
+ s.args().append("bw_recurrent_to_input_weights",
+ tbl->lookup(node->bw_recurrent_to_input_weights()));
+ s.args().append("bw_recurrent_to_forget_weights",
+ tbl->lookup(node->bw_recurrent_to_forget_weights()));
+ s.args().append("bw_recurrent_to_cell_weights",
+ tbl->lookup(node->bw_recurrent_to_cell_weights()));
+ s.args().append("bw_recurrent_to_output_weights",
+ tbl->lookup(node->bw_recurrent_to_output_weights()));
+
+ s.args().append("bw_cell_to_input_weights", tbl->lookup(node->bw_cell_to_input_weights()));
+ s.args().append("bw_cell_to_forget_weights", tbl->lookup(node->bw_cell_to_forget_weights()));
+ s.args().append("bw_cell_to_output_weights", tbl->lookup(node->bw_cell_to_output_weights()));
+
+ s.args().append("bw_input_gate_bias", tbl->lookup(node->bw_input_gate_bias()));
+ s.args().append("bw_forget_gate_bias", tbl->lookup(node->bw_forget_gate_bias()));
+ s.args().append("bw_cell_gate_bias", tbl->lookup(node->bw_cell_gate_bias()));
+ s.args().append("bw_output_gate_bias", tbl->lookup(node->bw_output_gate_bias()));
+
+ s.args().append("bw_projection_weights", tbl->lookup(node->bw_projection_weights()));
+ s.args().append("bw_projection_bias", tbl->lookup(node->bw_projection_bias()));
+
+ s.args().append("fw_activation_state", tbl->lookup(node->fw_activation_state()));
+ s.args().append("fw_cell_state", tbl->lookup(node->fw_cell_state()));
+ s.args().append("bw_activation_state", tbl->lookup(node->bw_activation_state()));
+ s.args().append("bw_cell_state", tbl->lookup(node->bw_cell_state()));
+
+ s.args().append("auxillary_input", tbl->lookup(node->auxillary_input()));
+ s.args().append("fw_auxillary_input_to_input_weights",
+ tbl->lookup(node->fw_auxillary_input_to_input_weights()));
+ s.args().append("fw_auxillary_input_to_forget_weights",
+ tbl->lookup(node->fw_auxillary_input_to_forget_weights()));
+ s.args().append("fw_auxillary_input_to_cell_weights",
+ tbl->lookup(node->fw_auxillary_input_to_cell_weights()));
+ s.args().append("fw_auxillary_input_to_output_weights",
+ tbl->lookup(node->fw_auxillary_input_to_output_weights()));
+ s.args().append("bw_auxillary_input_to_input_weights",
+ tbl->lookup(node->bw_auxillary_input_to_input_weights()));
+ s.args().append("bw_auxillary_input_to_forget_weights",
+ tbl->lookup(node->bw_auxillary_input_to_forget_weights()));
+ s.args().append("bw_auxillary_input_to_cell_weights",
+ tbl->lookup(node->bw_auxillary_input_to_cell_weights()));
+ s.args().append("bw_auxillary_input_to_output_weights",
+ tbl->lookup(node->bw_auxillary_input_to_output_weights()));
+
+ s.args().append("cell_clip", to_str(node->cell_clip()));
+ s.args().append("proj_clip", to_str(node->proj_clip()));
+ s.args().append("merge_outputs", to_str(node->merge_outputs()));
+ s.args().append("time_major", to_str(node->time_major()));
+ s.args().append("asymmetric_quantize_inputs", to_str(node->asymmetric_quantize_inputs()));
+
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
bool summary_node(const locop::SymbolTable *tbl, const luci::CircleCast *node,
locop::NodeSummary &s)
{
return true;
}
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleFakeQuant *node,
+ locop::NodeSummary &s)
+{
+ s.args().append("inputs", tbl->lookup(node->inputs()));
+ s.args().append("min", pepper::str(node->min()));
+ s.args().append("max", pepper::str(node->max()));
+ s.args().append("num_bits", pepper::str(node->num_bits()));
+ s.args().append("narrow_range", node->narrow_range() ? "true" : "false");
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
bool summary_node(const locop::SymbolTable *tbl, const luci::CircleFill *node,
locop::NodeSummary &s)
{
s.comments().append("Mem = " + ptr_to_str(node)); \
return summary(dynamic_cast<const CLASS *>(node), s); \
}
+#define CIRCLE_VNODE CIRCLE_NODE
#include <luci/IR/CircleNodes.lst>
+#undef CIRCLE_VNODE
#undef CIRCLE_NODE
return false;
return summary_node(tbl(), node, s);
}
+bool CircleNodeSummaryBuilder::summary(const luci::CircleBidirectionalSequenceLSTM *node,
+ locop::NodeSummary &s) const
+{
+ return summary_node(tbl(), node, s);
+}
+
bool CircleNodeSummaryBuilder::summary(const luci::CircleCast *node, locop::NodeSummary &s) const
{
return summary_node(tbl(), node, s);
return summary_node(tbl(), node, s);
}
+bool CircleNodeSummaryBuilder::summary(const luci::CircleFakeQuant *node,
+ locop::NodeSummary &s) const
+{
+ return summary_node(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleFill *node, locop::NodeSummary &s) const
+{
+ return summary_node(tbl(), node, s);
+}
+
bool CircleNodeSummaryBuilder::summary(const luci::CircleFloor *node, locop::NodeSummary &s) const
{
return use_x(tbl(), node, s);
return use_xy(tbl(), node, s);
}
-bool CircleNodeSummaryBuilder::summary(const luci::CircleFill *node, locop::NodeSummary &s) const
-{
- return summary_node(tbl(), node, s);
-}
-
bool CircleNodeSummaryBuilder::summary(const luci::CircleFullyConnected *node,
locop::NodeSummary &s) const
{
--- /dev/null
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(luci_partition SHARED ${SOURCES})
+target_include_directories(luci_partition PRIVATE src)
+target_include_directories(luci_partition PUBLIC include)
+target_link_libraries(luci_partition PUBLIC luci_lang)
+target_link_libraries(luci_partition PRIVATE luci_service)
+target_link_libraries(luci_partition PRIVATE luci_log)
+target_link_libraries(luci_partition PRIVATE luci_logex)
+target_link_libraries(luci_partition PRIVATE mio_circle)
+target_link_libraries(luci_partition PRIVATE nncc_common)
+target_link_libraries(luci_partition PRIVATE oops)
+
+install(TARGETS luci_partition DESTINATION lib)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(luci_partition_test ${TESTS})
+target_include_directories(luci_partition_test PRIVATE src)
+target_link_libraries(luci_partition_test luci_lang)
+target_link_libraries(luci_partition_test luci_partition)
+target_link_libraries(luci_partition_test luci_testhelper)
+target_link_libraries(luci_partition_test luci_service)
--- /dev/null
+# luci-partition
+
+`luci-partition` provides partition of a model to two or more sub models and
+its connection configuration having same computational results.
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITION_H__
+#define __LUCI_PARTITION_H__
+
+#include <luci/IR/Module.h>
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace luci
+{
+
+/**
+ * @brief PartitionTable holds partition information
+ */
+struct PartitionTable
+{
+ std::vector<std::string> groups;
+ std::string default_group;
+
+ // assign by opcode name: OPCODENAME=group
+ std::unordered_map<std::string /* OPCODENAME */, std::string /* group */> byopcodes;
+
+ // TODO add assign by OP name
+};
+
+/**
+ * @brief PartedModule holds partitioned module and group name
+ */
+struct PartedModule
+{
+ std::unique_ptr<Module> module;
+ // group name used to partition this module
+ std::string group;
+
+ // unique name(filename) of this module
+ std::string name;
+};
+
+struct PartedModules
+{
+ std::vector<PartedModule> pmodules;
+
+ // TODO add connections ?
+};
+
+/**
+ * @brief Method to do paritioning from module and PartitionTable to produce PartedModules
+ */
+PartedModules apply(Module *module, const PartitionTable &partition);
+
+} // namespace luci
+
+#endif // __LUCI_PARTITION_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleOpCode.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+#include <mio/circle/schema_generated.h>
+
+namespace
+{
+
+using namespace luci;
+using namespace circle;
+
+class QueryOpCode final : public CircleNodeVisitor<BuiltinOperator>
+{
+public:
+// NOTE only circle operator may have BuiltinOperator_XXX
+#define CIRCLE_NODE(OPCODE, CIRCLE_CLASS) \
+ BuiltinOperator visit(const CIRCLE_CLASS *) final { return BuiltinOperator_##OPCODE; }
+#define CIRCLE_VNODE(OPCODE, CIRCLE_CLASS)
+
+#include "luci/IR/CircleNodes.lst"
+#undef CIRCLE_VNODE
+#undef CIRCLE_NODE
+
+ // NOTE only builtin operators should be called (NOT virtual nodes)
+};
+
+class QueryCircleName final : public luci::CircleNodeVisitor<const char *>
+{
+public:
+// NOTE provide names for circle virtual nodes
+#define CIRCLE_NODE(OPCODE, CIRCLE_CLASS)
+#define CIRCLE_VNODE(OPCODE, CIRCLE_CLASS) \
+ const char *visit(const CIRCLE_CLASS *) final { return #OPCODE; }
+
+#include "luci/IR/CircleNodes.lst"
+#undef CIRCLE_VNODE
+#undef CIRCLE_NODE
+
+ // default is null
+ const char *visit(const luci::CircleNode *) final { return nullptr; }
+};
+
+} // namespace
+
+namespace luci
+{
+
+std::string opcode_name(const CircleNode *node)
+{
+ QueryCircleName qcn;
+ auto cname = node->accept(&qcn);
+ if (cname != nullptr)
+ return std::string(cname);
+
+ QueryOpCode qoc;
+ auto opcode = node->accept(&qoc);
+ auto name = circle::EnumNameBuiltinOperator(opcode);
+ return std::string(name);
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITION_CIRCLE_OP_CODE_H__
+#define __LUCI_PARTITION_CIRCLE_OP_CODE_H__
+
+#include <luci/IR/CircleNode.h>
+
+#include <string>
+
+namespace luci
+{
+
+std::string opcode_name(const CircleNode *node);
+
+} // namespace luci
+
+#endif // __LUCI_PARTITION_CIRCLE_OP_CODE_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleOpCode.h"
+
+// NOTE any node will do for testing
+#include <luci/IR/Nodes/CircleSqrt.h>
+
+#include <gtest/gtest.h>
+
+TEST(CircleOpCodeTest, name)
+{
+ auto g = loco::make_graph();
+ auto node = g->nodes()->create<luci::CircleSqrt>();
+
+ auto name = luci::opcode_name(node);
+ ASSERT_EQ(name, "SQRT");
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+#include <oops/UserExn.h>
+
+namespace luci
+{
+
+void clone_connect(const luci::CircleNode *node, luci::CloneContext &clonecontext)
+{
+ ConnectNode cn(clonecontext);
+ node->accept(&cn);
+}
+
+luci::CircleNode *ConnectNode::find_clone(const luci::CircleNode *node)
+{
+ auto it = _clonecontext.find(node);
+ if (it == _clonecontext.end())
+ throw oops::UserExn("Invalid node in ConnectNode");
+ return it->second;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITION_CONNECT_NODE_H__
+#define __LUCI_PARTITION_CONNECT_NODE_H__
+
+#include <luci/IR/CircleNode.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+namespace luci
+{
+
+/**
+ * @note MapNode2Clone is used as a map from original node to cloned node
+ * to find input of a cloned node
+ *
+ * (Original) (Clone)
+ *
+ * [A] [A']
+ * | [B] | [B']
+ * | | | |
+ * \ / \ /
+ * [C] [C']
+ *
+ * From view of [C'] we need to find [A'] and [B']. We know [C] from [C'],
+ * then we can get from input of [C] as [A], [B] then [A]->[A'] and [B]->[B']
+ * from the map.
+ */
+using MapNode2Clone = std::map<const CircleNode * /* ORG */, CircleNode * /* CLONE */>;
+
+struct CloneContext
+{
+ std::pair<MapNode2Clone::iterator, bool> emplace(const CircleNode *org, CircleNode *clone)
+ {
+ return node2clone.emplace(org, clone);
+ }
+ MapNode2Clone::iterator find(const CircleNode *org) { return node2clone.find(org); }
+ MapNode2Clone::iterator end(void) { return node2clone.end(); }
+
+ MapNode2Clone node2clone;
+};
+
+class ConnectNode final : public luci::CircleNodeVisitor<void>
+{
+public:
+ ConnectNode(luci::CloneContext &clonecontext) : _clonecontext(clonecontext){};
+
+public:
+ // void visit(const luci::CircleAbs *) final;
+ void visit(const luci::CircleAdd *) final;
+ // void visit(const luci::CircleAddN *) final;
+ // void visit(const luci::CircleArgMax *) final;
+ // void visit(const luci::CircleArgMin *) final;
+ // void visit(const luci::CircleAveragePool2D *) final;
+ // void visit(const luci::CircleBatchMatMul *) final;
+ // void visit(const luci::CircleBatchToSpaceND *) final;
+ // void visit(const luci::CircleCast *) final;
+ // void visit(const luci::CircleCeil *) final;
+ // void visit(const luci::CircleConcatenation *) final;
+ void visit(const luci::CircleConst *) final;
+ // void visit(const luci::CircleConv2D *) final;
+ // void visit(const luci::CircleCos *) final;
+ // void visit(const luci::CircleCustom *) final;
+ // void visit(const luci::CircleDepthToSpace *) final;
+ // void visit(const luci::CircleDepthwiseConv2D *) final;
+ // void visit(const luci::CircleDequantize *) final;
+ void visit(const luci::CircleDiv *) final;
+ // void visit(const luci::CircleElu *) final;
+ // void visit(const luci::CircleEqual *) final;
+ // void visit(const luci::CircleExp *) final;
+ // void visit(const luci::CircleExpandDims *) final;
+ // void visit(const luci::CircleFakeQuant *) final;
+ // void visit(const luci::CircleFill *) final;
+ // void visit(const luci::CircleFloor *) final;
+ // void visit(const luci::CircleFloorDiv *) final;
+ // void visit(const luci::CircleFloorMod *) final;
+ // void visit(const luci::CircleFullyConnected *) final;
+ // void visit(const luci::CircleGather *) final;
+ // void visit(const luci::CircleGatherNd *) final;
+ // void visit(const luci::CircleGreater *) final;
+ // void visit(const luci::CircleGreaterEqual *) final;
+ // void visit(const luci::CircleIf *) final;
+ // void visit(const luci::CircleL2Normalize *) final;
+ // void visit(const luci::CircleL2Pool2D *) final;
+ // void visit(const luci::CircleLeakyRelu *) final;
+ // void visit(const luci::CircleLess *) final;
+ // void visit(const luci::CircleLessEqual *) final;
+ // void visit(const luci::CircleLocalResponseNormalization *) final;
+ // void visit(const luci::CircleLog *) final;
+ // void visit(const luci::CircleLogicalAnd *) final;
+ // void visit(const luci::CircleLogicalNot *) final;
+ // void visit(const luci::CircleLogicalOr *) final;
+ // void visit(const luci::CircleLogistic *) final;
+ // void visit(const luci::CircleLogSoftmax *) final;
+ // void visit(const luci::CircleMatrixDiag *) final;
+ // void visit(const luci::CircleMatrixSetDiag *) final;
+ // void visit(const luci::CircleMaximum *) final;
+ // void visit(const luci::CircleMaxPool2D *) final;
+ void visit(const luci::CircleMean *) final;
+ // void visit(const luci::CircleMinimum *) final;
+ // void visit(const luci::CircleMirrorPad *) final;
+ void visit(const luci::CircleMul *) final;
+ // void visit(const luci::CircleNeg *) final;
+ // void visit(const luci::CircleNonMaxSuppressionV4 *) final;
+ // void visit(const luci::CircleNonMaxSuppressionV5 *) final;
+ // void visit(const luci::CircleNotEqual *) final;
+ // void visit(const luci::CircleOneHot *) final;
+ // void visit(const luci::CirclePack *) final;
+ // void visit(const luci::CirclePad *) final;
+ // void visit(const luci::CirclePadV2 *) final;
+ void visit(const luci::CirclePow *) final;
+ // void visit(const luci::CirclePRelu *) final;
+ // void visit(const luci::CircleRange *) final;
+ // void visit(const luci::CircleRank *) final;
+ // void visit(const luci::CircleReduceAny *) final;
+ // void visit(const luci::CircleReduceMax *) final;
+ // void visit(const luci::CircleReduceMin *) final;
+ // void visit(const luci::CircleReduceProd *) final;
+ // void visit(const luci::CircleRelu *) final;
+ // void visit(const luci::CircleRelu6 *) final;
+ // void visit(const luci::CircleReluN1To1 *) final;
+ // void visit(const luci::CircleReshape *) final;
+ // void visit(const luci::CircleResizeBilinear *) final;
+ // void visit(const luci::CircleResizeNearestNeighbor *) final;
+ // void visit(const luci::CircleReverseSequence *) final;
+ // void visit(const luci::CircleReverseV2 *) final;
+ // void visit(const luci::CircleRound *) final;
+ void visit(const luci::CircleRsqrt *) final;
+ // void visit(const luci::CircleScatterNd *) final;
+ // void visit(const luci::CircleSegmentSum *) final;
+ // void visit(const luci::CircleSelect *) final;
+ // void visit(const luci::CircleSelectV2 *) final;
+ // void visit(const luci::CircleShape *) final;
+ // void visit(const luci::CircleSin *) final;
+ // void visit(const luci::CircleSlice *) final;
+ // void visit(const luci::CircleSoftmax *) final;
+ // void visit(const luci::CircleSpaceToBatchND *) final;
+ // void visit(const luci::CircleSpaceToDepth *) final;
+ // void visit(const luci::CircleSparseToDense *) final;
+ // void visit(const luci::CircleSplit *) final;
+ // void visit(const luci::CircleSplitV *) final;
+ void visit(const luci::CircleSqrt *) final;
+ // void visit(const luci::CircleSquare *) final;
+ void visit(const luci::CircleSquaredDifference *) final;
+ // void visit(const luci::CircleSqueeze *) final;
+ // void visit(const luci::CircleStridedSlice *) final;
+ void visit(const luci::CircleSub *) final;
+ // void visit(const luci::CircleSum *) final;
+ // void visit(const luci::CircleTanh *) final;
+ // void visit(const luci::CircleTile *) final;
+ // void visit(const luci::CircleTopKV2 *) final;
+ // void visit(const luci::CircleTranspose *) final;
+ // void visit(const luci::CircleTransposeConv *) final;
+ // void visit(const luci::CircleUnidirectionalSequenceLSTM *) final;
+ // void visit(const luci::CircleUnique *) final;
+ // void visit(const luci::CircleUnpack *) final;
+ // void visit(const luci::CircleWhere *) final;
+ // void visit(const luci::CircleWhile *) final;
+ // void visit(const luci::CircleZerosLike *) final;
+
+ // Circle Only
+ // void visit(const luci::CircleBCQFullyConnected *) final;
+ // void visit(const luci::CircleBCQGather *) final;
+ // void visit(const luci::CircleInstanceNorm *) final;
+
+ // Virtual
+ // void visit(const luci::CircleCustomOut *) final;
+ // void visit(const luci::CircleIfOut *) final;
+ // void visit(const luci::CircleInput *) final;
+ // void visit(const luci::CircleNonMaxSuppressionV4Out *) final;
+ // void visit(const luci::CircleNonMaxSuppressionV5Out *) final;
+ // void visit(const luci::CircleOutput *) final;
+ // void visit(const luci::CircleOutputDummy *) final;
+ // void visit(const luci::CircleOutputExclude *) final;
+ // void visit(const luci::CircleSplitOut *) final;
+ // void visit(const luci::CircleSplitVOut *) final;
+ // void visit(const luci::CircleTopKV2Out *) final;
+ // void visit(const luci::CircleUniqueOut *) final;
+ // void visit(const luci::CircleUnpackOut *) final;
+ // void visit(const luci::CircleWhileOut *) final;
+
+public:
+ luci::CircleNode *find_clone(const luci::CircleNode *node);
+
+protected:
+ luci::CloneContext &_clonecontext;
+};
+
+/**
+ * @brief Connect cloned node from input node
+ */
+void clone_connect(const luci::CircleNode *node, luci::CloneContext &clonecontext);
+
+} // namespace luci
+
+#endif // __LUCI_PARTITION_CONNECT_NODE_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.test.h"
+
+// This file validates "ConnectNode.test.h". Please DO NOT remove this file.
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONNECT_NODE_TEST_H__
+#define __CONNECT_NODE_TEST_H__
+
+#include "ConnectNode.h"
+
+#include <luci/Service/CircleNodeClone.h>
+#include <luci/test/TestIOGraph.h>
+
+#include <loco/IR/Graph.h>
+
+#include <initializer_list>
+#include <memory>
+#include <stdexcept>
+#include <vector>
+
+namespace luci
+{
+namespace test
+{
+
+template <unsigned N> class TestIsOGraph : public TestIsGraphlet<N>, public TestOGraphlet
+{
+public:
+ TestIsOGraph() = default;
+
+public:
+ virtual void init(const std::initializer_list<ShapeU32> shape_in, const ShapeU32 shape_out)
+ {
+ if (shape_in.size() != N)
+ throw std::runtime_error("Failed to init TestIsOGraph");
+
+ TestIsGraphlet<N>::init(TestIsGraphlet<N>::g(), shape_in);
+ TestOGraphlet::init(TestIsGraphlet<N>::g(), shape_out);
+ }
+};
+
+template <class T> class NodeGraphletT
+{
+public:
+ virtual void init(loco::Graph *g)
+ {
+ _node = g->nodes()->create<T>();
+ _node->dtype(loco::DataType::S32);
+ _node->name("node");
+ }
+
+ T *node(void) const { return _node; }
+
+protected:
+ T *_node{nullptr};
+};
+
+template <class T> class NodeIsGraphletT
+{
+public:
+ virtual void init(loco::Graph *g, uint32_t n)
+ {
+ _node = g->nodes()->create<T>(n);
+ _node->dtype(loco::DataType::S32);
+ _node->name("node");
+ }
+
+ T *node(void) const { return _node; }
+
+protected:
+ T *_node{nullptr};
+};
+
+/**
+ * @brief ConnectionTestHelper provides common framework for testing
+ * cloned CircleNode connection
+ */
+class ConnectionTestHelper
+{
+public:
+ ConnectionTestHelper() { _graph_clone = loco::make_graph(); }
+
+public:
+ template <unsigned N> void prepare_inputs(TestIsOGraph<N> *isograph)
+ {
+ assert(N == isograph->num_inputs());
+
+ for (uint32_t i = 0; i < N; ++i)
+ {
+ auto *input = _graph_clone->nodes()->create<luci::CircleInput>();
+ luci::copy_common_attributes(isograph->input(i), input);
+ _clonectx.emplace(isograph->input(i), input);
+ _inputs.push_back(input);
+ }
+ }
+
+ /**
+ * @note prepare_inputs_miss is for negative testing
+ */
+ template <unsigned N> void prepare_inputs_miss(TestIsOGraph<N> *isograph)
+ {
+ assert(N == isograph->num_inputs());
+
+ for (uint32_t i = 0; i < N; ++i)
+ {
+ auto *input = _graph_clone->nodes()->create<luci::CircleInput>();
+ luci::copy_common_attributes(isograph->input(i), input);
+ if (i != 0)
+ _clonectx.emplace(isograph->input(i), input);
+ _inputs.push_back(input);
+ }
+ }
+
+ void clone_connect(luci::CircleNode *node, luci::CircleNode *clone)
+ {
+ _clonectx.emplace(node, clone);
+
+ luci::clone_connect(node, _clonectx);
+ }
+
+public:
+ loco::Graph *graph_clone(void) { return _graph_clone.get(); }
+
+ luci::CircleNode *inputs(uint32_t idx) { return _inputs.at(idx); }
+
+protected:
+ luci::CloneContext _clonectx;
+ std::vector<luci::CircleInput *> _inputs;
+ std::unique_ptr<loco::Graph> _graph_clone; // graph for clones
+};
+
+} // namespace test
+} // namespace luci
+
+#endif // __CONNECT_NODE_TEST_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleAdd *node)
+{
+ auto *cloned = loco::must_cast<luci::CircleAdd *>(cn->find_clone(node));
+
+ luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+ luci::CircleNode *y = loco::must_cast<luci::CircleNode *>(node->y());
+
+ cloned->x(cn->find_clone(x));
+ cloned->y(cn->find_clone(y));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleAdd *node) { connect(this, node); }
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleAdd>
+{
+public:
+ NodeGraphlet() = default;
+
+public:
+ void init(loco::Graph *g) override
+ {
+ NodeGraphletT<luci::CircleAdd>::init(g);
+
+ _node->fusedActivationFunction(luci::FusedActFunc::RELU);
+ }
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+ TestNodeGraph() = default;
+
+public:
+ void init(const ShapeU32 shape)
+ {
+ TestIsOGraph<2>::init({shape, shape}, shape);
+ NodeGraphlet::init(g());
+
+ node()->x(input(0));
+ node()->y(input(1));
+
+ output()->from(node());
+ }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Add)
+{
+ TestNodeGraph tng;
+ tng.init({2, 3});
+
+ ConnectionTestHelper cth;
+ cth.prepare_inputs(&tng);
+
+ auto *node = tng.node();
+ ASSERT_NO_THROW(loco::must_cast<luci::CircleAdd *>(node));
+
+ auto *clone = luci::clone_node(node, cth.graph_clone());
+ ASSERT_NO_THROW(loco::must_cast<luci::CircleAdd *>(clone));
+
+ cth.clone_connect(node, clone);
+
+ ASSERT_EQ(2, clone->arity());
+ ASSERT_EQ(cth.inputs(0), clone->arg(0));
+ ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_Add_NEG)
+{
+ TestNodeGraph tng;
+ tng.init({2, 3});
+
+ ConnectionTestHelper cth;
+ cth.prepare_inputs_miss(&tng);
+
+ auto *node = tng.node();
+ ASSERT_NO_THROW(loco::must_cast<luci::CircleAdd *>(node));
+
+ auto *clone = luci::clone_node(node, cth.graph_clone());
+ ASSERT_NO_THROW(loco::must_cast<luci::CircleAdd *>(clone));
+
+ EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleConst *)
+{
+ // Nothing to do
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleDiv *node)
+{
+ auto *cloned = loco::must_cast<luci::CircleDiv *>(cn->find_clone(node));
+
+ luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+ luci::CircleNode *y = loco::must_cast<luci::CircleNode *>(node->y());
+
+ cloned->x(cn->find_clone(x));
+ cloned->y(cn->find_clone(y));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleDiv *node) { connect(this, node); }
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleDiv>
+{
+public:
+ NodeGraphlet() = default;
+
+public:
+ void init(loco::Graph *g) override
+ {
+ NodeGraphletT<luci::CircleDiv>::init(g);
+
+ _node->fusedActivationFunction(luci::FusedActFunc::RELU);
+ }
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+ TestNodeGraph() = default;
+
+public:
+ void init(const ShapeU32 shape)
+ {
+ TestIsOGraph<2>::init({shape, shape}, shape);
+ NodeGraphlet::init(g());
+
+ node()->x(input(0));
+ node()->y(input(1));
+
+ output()->from(node());
+ }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Div)
+{
+ TestNodeGraph tng;
+ tng.init({2, 3});
+
+ ConnectionTestHelper cth;
+ cth.prepare_inputs(&tng);
+
+ auto *node = tng.node();
+ ASSERT_NO_THROW(loco::must_cast<luci::CircleDiv *>(node));
+
+ auto *clone = luci::clone_node(node, cth.graph_clone());
+ ASSERT_NO_THROW(loco::must_cast<luci::CircleDiv *>(clone));
+
+ cth.clone_connect(node, clone);
+
+ ASSERT_EQ(2, clone->arity());
+ ASSERT_EQ(cth.inputs(0), clone->arg(0));
+ ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_Div_NEG)
+{
+ TestNodeGraph tng;
+ tng.init({2, 3});
+
+ ConnectionTestHelper cth;
+ cth.prepare_inputs_miss(&tng);
+
+ auto *node = tng.node();
+ ASSERT_NO_THROW(loco::must_cast<luci::CircleDiv *>(node));
+
+ auto *clone = luci::clone_node(node, cth.graph_clone());
+ ASSERT_NO_THROW(loco::must_cast<luci::CircleDiv *>(clone));
+
+ EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleMean *node)
+{
+ auto *cloned = loco::must_cast<luci::CircleMean *>(cn->find_clone(node));
+
+ luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+ luci::CircleNode *reduction_indices =
+ loco::must_cast<luci::CircleNode *>(node->reduction_indices());
+
+ cloned->input(cn->find_clone(input));
+ cloned->reduction_indices(cn->find_clone(reduction_indices));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleMean *node) { connect(this, node); }
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleMul *node)
+{
+ auto *cloned = loco::must_cast<luci::CircleMul *>(cn->find_clone(node));
+
+ luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+ luci::CircleNode *y = loco::must_cast<luci::CircleNode *>(node->y());
+
+ cloned->x(cn->find_clone(x));
+ cloned->y(cn->find_clone(y));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleMul *node) { connect(this, node); }
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleMul>
+{
+public:
+ NodeGraphlet() = default;
+
+public:
+ void init(loco::Graph *g)
+ {
+ NodeGraphletT<luci::CircleMul>::init(g);
+
+ _node->fusedActivationFunction(luci::FusedActFunc::RELU);
+ }
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+ TestNodeGraph() = default;
+
+public:
+ void init(const ShapeU32 shape)
+ {
+ TestIsOGraph<2>::init({shape, shape}, shape);
+ NodeGraphlet::init(g());
+
+ node()->x(input(0));
+ node()->y(input(1));
+
+ output()->from(node());
+ }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Mul)
+{
+ TestNodeGraph tng;
+ tng.init({2, 3});
+
+ ConnectionTestHelper cth;
+ cth.prepare_inputs(&tng);
+
+ auto *node = tng.node();
+ ASSERT_NO_THROW(loco::must_cast<luci::CircleMul *>(node));
+
+ auto *clone = luci::clone_node(node, cth.graph_clone());
+ ASSERT_NO_THROW(loco::must_cast<luci::CircleMul *>(clone));
+
+ cth.clone_connect(node, clone);
+
+ ASSERT_EQ(2, clone->arity());
+ ASSERT_EQ(cth.inputs(0), clone->arg(0));
+ ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_Mul_NEG)
+{
+ TestNodeGraph tng;
+ tng.init({2, 3});
+
+ ConnectionTestHelper cth;
+ cth.prepare_inputs_miss(&tng);
+
+ auto *node = tng.node();
+ ASSERT_NO_THROW(loco::must_cast<luci::CircleMul *>(node));
+
+ auto *clone = luci::clone_node(node, cth.graph_clone());
+ ASSERT_NO_THROW(loco::must_cast<luci::CircleMul *>(clone));
+
+ EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CirclePow *node)
+{
+ auto *cloned = loco::must_cast<luci::CirclePow *>(cn->find_clone(node));
+
+ luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+ luci::CircleNode *y = loco::must_cast<luci::CircleNode *>(node->y());
+
+ cloned->x(cn->find_clone(x));
+ cloned->y(cn->find_clone(y));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CirclePow *node) { connect(this, node); }
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleRsqrt *node)
+{
+ auto *cloned = loco::must_cast<luci::CircleRsqrt *>(cn->find_clone(node));
+
+ luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+
+ cloned->x(cn->find_clone(x));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleRsqrt *node) { connect(this, node); }
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleSqrt *node)
+{
+ auto *cloned = loco::must_cast<luci::CircleSqrt *>(cn->find_clone(node));
+
+ luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+
+ cloned->x(cn->find_clone(x));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleSqrt *node) { connect(this, node); }
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleSquaredDifference *node)
+{
+ auto *cloned = loco::must_cast<luci::CircleSquaredDifference *>(cn->find_clone(node));
+
+ luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+ luci::CircleNode *y = loco::must_cast<luci::CircleNode *>(node->y());
+
+ cloned->x(cn->find_clone(x));
+ cloned->y(cn->find_clone(y));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleSquaredDifference *node) { connect(this, node); }
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleSub *node)
+{
+ auto *cloned = loco::must_cast<luci::CircleSub *>(cn->find_clone(node));
+
+ luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+ luci::CircleNode *y = loco::must_cast<luci::CircleNode *>(node->y());
+
+ cloned->x(cn->find_clone(x));
+ cloned->y(cn->find_clone(y));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleSub *node) { connect(this, node); }
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleSub>
+{
+public:
+ NodeGraphlet() = default;
+
+public:
+ void init(loco::Graph *g)
+ {
+ NodeGraphletT<luci::CircleSub>::init(g);
+
+ _node->fusedActivationFunction(luci::FusedActFunc::RELU);
+ }
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+ TestNodeGraph() = default;
+
+public:
+ void init(const ShapeU32 shape)
+ {
+ TestIsOGraph<2>::init({shape, shape}, shape);
+ NodeGraphlet::init(g());
+
+ node()->x(input(0));
+ node()->y(input(1));
+
+ output()->from(node());
+ }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Sub)
+{
+ TestNodeGraph tng;
+ tng.init({2, 3});
+
+ ConnectionTestHelper cth;
+ cth.prepare_inputs(&tng);
+
+ auto *node = tng.node();
+ ASSERT_NO_THROW(loco::must_cast<luci::CircleSub *>(node));
+
+ auto *clone = luci::clone_node(node, cth.graph_clone());
+ ASSERT_NO_THROW(loco::must_cast<luci::CircleSub *>(clone));
+
+ cth.clone_connect(node, clone);
+
+ ASSERT_EQ(2, clone->arity());
+ ASSERT_EQ(cth.inputs(0), clone->arg(0));
+ ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_Sub_NEG)
+{
+ TestNodeGraph tng;
+ tng.init({2, 3});
+
+ ConnectionTestHelper cth;
+ cth.prepare_inputs_miss(&tng);
+
+ auto *node = tng.node();
+ ASSERT_NO_THROW(loco::must_cast<luci::CircleSub *>(node));
+
+ auto *clone = luci::clone_node(node, cth.graph_clone());
+ ASSERT_NO_THROW(loco::must_cast<luci::CircleSub *>(clone));
+
+ EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionIR.h"
+#include "PartitionIRDump.h"
+#include "PartitionPGroups.h"
+#include "PartitionMerge.h"
+#include "PartitionCleanup.h"
+#include "PartitionPModules.h"
+#include "PartitionPModulesDump.h"
+
+#include "luci/Partition.h"
+#include "luci/Log.h"
+
+#include <cassert>
+
+namespace luci
+{
+
+/**
+ * @brief This will return Partitioned Modules object
+ */
+PartedModules apply(Module *source, const PartitionTable &partition)
+{
+ assert(source != nullptr);
+
+ LOGGER(l);
+
+ auto pgroups = produce_pgroups(source, partition);
+ INFO(l) << "--- Partition Graph (1)------------------------";
+ INFO(l) << pgroups.get();
+
+ auto mpgroups = merge_pgroups(pgroups.get());
+ INFO(l) << "--- Partition Graph (2)------------------------";
+ INFO(l) << mpgroups.get();
+
+ remove_unused_inputoutputs(mpgroups.get(), source);
+ INFO(l) << "--- Partition Graph (3)------------------------";
+ INFO(l) << mpgroups.get();
+
+ auto pmodules = produce_pmodules(mpgroups.get());
+ INFO(l) << "--- Modules -----------------------------------";
+ INFO(l) << &pmodules;
+
+ return pmodules;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Partition.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <luci/IR/Nodes/CircleSqrt.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class SqrtGraphlet
+{
+public:
+ SqrtGraphlet() = default;
+
+public:
+ void init(loco::Graph *g, const ShapeU32 input_shape)
+ {
+ _sqrt = g->nodes()->create<luci::CircleSqrt>();
+ _sqrt->dtype(loco::DataType::S32);
+ _sqrt->name("sqrt");
+ }
+
+protected:
+ luci::CircleSqrt *_sqrt = nullptr;
+};
+
+class SqrtGraph : public TestIOGraph, public SqrtGraphlet
+{
+public:
+ SqrtGraph() = default;
+
+public:
+ void init(const ShapeU32 shape)
+ {
+ TestIOGraph::init(shape, shape);
+ SqrtGraphlet::init(g(), shape);
+
+ _sqrt->x(input());
+
+ output()->from(_sqrt);
+ }
+};
+
+} // namespace
+
+TEST(PartitionTest, simple_apply)
+{
+ luci::Module module;
+
+ SqrtGraph g;
+ g.init({3, 3});
+ g.transfer_to(&module);
+
+ luci::PartitionTable pt;
+ pt.default_group = "A";
+
+ auto pms = apply(&module, pt);
+
+ ASSERT_EQ(1, pms.pmodules.size());
+
+ auto &pm = *pms.pmodules.begin();
+ ASSERT_NE(nullptr, pm.module->graph());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionCleanup.h"
+
+#include "luci/Log.h"
+
+namespace
+{
+
+using CircleNodes = std::vector<luci::CircleNode *>;
+
+/**
+ * @note Original source outputs should be outputs
+ */
+void gather_graph_outputs(CircleNodes &nodes, const luci::Module *source)
+{
+ // graph outputs are treated as used
+ auto graph = source->graph();
+ for (uint32_t n = 0; n < graph->outputs()->size(); ++n)
+ {
+ auto output = luci::output_node(graph, n); // output is CircleOutput
+ assert(output != nullptr);
+
+ auto node = loco::must_cast<luci::CircleNode *>(output->from());
+
+ nodes.push_back(node);
+ }
+
+ // TODO add unused virtual outputs
+}
+
+/**
+ * @note If one PGroup requires an input, that input should be an output
+ * from another PGroup
+ */
+void gather_pgroups_outputs(CircleNodes &nodes, const luci::PGroups *pgroups)
+{
+ // input of a pgroup is used output
+ for (auto &pgroup : pgroups->pgroups)
+ {
+ for (auto input : pgroup->inputs)
+ {
+ nodes.push_back(input);
+ }
+ }
+}
+
+} // namespace
+
+namespace luci
+{
+
+void remove_unused_inputoutputs(luci::PGroups *pgroups, const luci::Module *source)
+{
+ assert(source != nullptr);
+ assert(pgroups != nullptr);
+
+ LOGGER(l);
+
+ // TODO support multiple subgraph
+ assert(source->size() == 1);
+
+ INFO(l) << "--- Cleanup unused inputs/outputs";
+
+ // remove input within same pgroup
+ for (auto &pgroup : pgroups->pgroups)
+ {
+ bool changed;
+ do
+ {
+ changed = false;
+ for (auto it = pgroup->inputs.begin(); it != pgroup->inputs.end(); ++it)
+ {
+ auto input = *it;
+ if (pgroups->pgroup_of(input) == pgroup.get())
+ {
+ INFO(l) << " Cleanup input " << input->name() << " from group " << pgroup->group;
+ pgroup->inputs.erase(it);
+ changed = true;
+ break;
+ }
+ // NOTE CircleConst is one of input type, as they are registered as
+ // input to some node and then (should be) merged.
+ // Remove if this input is CircleConst
+ if (dynamic_cast<CircleConst *>(input) != nullptr)
+ {
+ INFO(l) << " Cleanup CircleConst " << input->name() << " from group " << pgroup->group;
+ pgroup->inputs.erase(it);
+ changed = true;
+ break;
+ }
+ }
+ } while (changed);
+ }
+
+ // remove unused output(s)
+ // 'used_outputs' will hold actual used outputs for all PGroups
+ CircleNodes used_outputs;
+
+ gather_graph_outputs(used_outputs, source);
+ gather_pgroups_outputs(used_outputs, pgroups);
+
+ for (auto &pgroup : pgroups->pgroups)
+ {
+ bool changed;
+ do
+ {
+ changed = false;
+ for (auto it = pgroup->outputs.begin(); it != pgroup->outputs.end(); ++it)
+ {
+ auto output = *it;
+ auto oit = std::find(used_outputs.begin(), used_outputs.end(), output);
+ if (oit == used_outputs.end())
+ {
+ INFO(l) << " Cleanup output " << output->name() << " from group " << pgroup->group;
+ pgroup->outputs.erase(it);
+ changed = true;
+ break;
+ }
+ }
+ } while (changed);
+ }
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITON_CLEANUP_H__
+#define __LUCI_PARTITON_CLEANUP_H__
+
+#include "PartitionIR.h"
+
+#include <luci/IR/Module.h>
+
+namespace luci
+{
+
+/**
+ * @brief This will remove unused inputs/outputs in each pgroup of pgroups
+ */
+void remove_unused_inputoutputs(luci::PGroups *, const luci::Module *);
+
+} // namespace luci
+
+#endif // __LUCI_PARTITON_CLEANUP_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionIR.h"
+#include "CircleOpCode.h"
+
+#include "luci/Log.h"
+
+#include <cassert>
+#include <ostream>
+#include <iostream>
+
+namespace luci
+{
+
+std::unique_ptr<PGroups> PGroups::make_copy(void) const
+{
+ auto d_pgroups = std::make_unique<luci::PGroups>();
+
+ for (auto &s_pgroup : pgroups)
+ {
+ // make a copy of s_pgroup to d_pgroup
+ std::unique_ptr<luci::PGroup> d_pgroup = std::make_unique<luci::PGroup>();
+
+ d_pgroup->group = s_pgroup->group;
+ d_pgroup->id = s_pgroup->id;
+
+ for (auto &pnode : s_pgroup->pnodes)
+ {
+ auto pnodec = std::make_unique<luci::PNode>();
+ pnodec->node = pnode->node;
+ pnodec->group = pnode->group;
+ pnodec->pgroup = d_pgroup.get();
+ d_pgroup->pnodes.push_back(std::move(pnodec));
+ }
+
+ for (auto &input : s_pgroup->inputs)
+ d_pgroup->inputs.push_back(input);
+
+ for (auto &output : s_pgroup->outputs)
+ d_pgroup->outputs.push_back(output);
+
+ // copy node2group
+ for (auto it = node2group.begin(); it != node2group.end(); ++it)
+ d_pgroups->node2group[it->first] = it->second;
+
+ // build id2pgroup
+ d_pgroups->id2pgroup[d_pgroup->id] = d_pgroup.get();
+
+ d_pgroups->pgroups.push_back(std::move(d_pgroup));
+ // note: d_pgroup is now nullptr as it's moved
+ }
+
+ return std::move(d_pgroups);
+}
+
+std::string PGroups::group_of(luci::CircleNode *node) const
+{
+ assert(node != nullptr);
+
+ LOGGER(l);
+
+ auto it = node2group.find(node);
+ if (it == node2group.end())
+ {
+ INFO(l) << "PGroups::group_of " << node << "(" << node->name() << ") not found" << std::endl;
+ return "";
+ }
+ return it->second;
+}
+
+const PGroup *PGroups::pgroup_of(luci::CircleNode *node) const
+{
+ assert(node != nullptr);
+
+ for (auto &pgroup : pgroups)
+ {
+ for (auto &pnode : pgroup->pnodes)
+ {
+ if (node == pnode->node)
+ return pgroup.get();
+ }
+ }
+ // node maybe graph input (CircleInput)
+ return nullptr;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITION_IR_H__
+#define __LUCI_PARTITION_IR_H__
+
+#include <luci/IR/CircleNodes.h>
+
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace luci
+{
+
+struct PGroup;
+
+/**
+ * @brief Partition Node with CircleNode with group name
+ * @note node just points to source luci::CircleNode, NOT the cloned node
+ * CloneContext is used to find cloned node from source node
+ */
+struct PNode
+{
+ const luci::CircleNode *node = nullptr;
+ std::string group;
+
+ const PGroup *pgroup = nullptr;
+};
+
+/**
+ * @brief Partition Group with Partition Nodes of same group and I/Os nodes
+ */
+struct PGroup
+{
+ std::vector<std::unique_ptr<PNode>> pnodes;
+ std::string group;
+ uint32_t id = 0;
+
+ // I/O while partitioning
+ std::vector<luci::CircleNode *> inputs;
+ std::vector<luci::CircleNode *> outputs;
+};
+
+struct PGroups
+{
+ std::vector<std::unique_ptr<PGroup>> pgroups;
+
+ // node2group is to find group key from source node
+ std::map<const luci::CircleNode *, std::string> node2group;
+
+ // id2pngroup is to find *pngroup from pngroup id
+ std::map<uint32_t, PGroup *> id2pgroup;
+
+ // default group key for reference
+ std::string default_group;
+
+public:
+ /**
+ * @brief return a copy of PGroups
+ */
+ std::unique_ptr<PGroups> make_copy(void) const;
+
+ /**
+ * @brief return group key of node, empty string if not found
+ */
+ std::string group_of(luci::CircleNode *node) const;
+
+ /**
+ * @brief return holding pgroup of node, nullptr if not found
+ */
+ const PGroup *pgroup_of(luci::CircleNode *node) const;
+};
+
+} // namespace luci
+
+#endif // __LUCI_PARTITION_IR_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionIR.h"
+
+// NOTE any node will do for testing
+#include <luci/IR/Nodes/CircleAdd.h>
+
+#include <gtest/gtest.h>
+
+#include <memory>
+
+TEST(PartitionIRTest, PNode_ctor)
+{
+ auto g = loco::make_graph();
+ auto node = g->nodes()->create<luci::CircleAdd>();
+
+ luci::PNode pnode;
+ pnode.node = node;
+
+ ASSERT_NE(nullptr, pnode.node);
+ ASSERT_EQ(nullptr, pnode.pgroup);
+}
+
+// TODO add more tests with luci::PNode
+
+TEST(PartitionIRTest, PGroup_ctor)
+{
+ auto g = loco::make_graph();
+ auto node = g->nodes()->create<luci::CircleAdd>();
+
+ luci::PGroup pgroup;
+ auto pnode = std::make_unique<luci::PNode>();
+ pnode->node = node;
+
+ pgroup.pnodes.push_back(std::move(pnode));
+
+ ASSERT_NE(pgroup.pnodes.end(), pgroup.pnodes.begin());
+ ASSERT_EQ(0, pgroup.inputs.size());
+ ASSERT_EQ(0, pgroup.outputs.size());
+}
+
+// TODO add more tests with luci::PGroup
+
+TEST(PartitionIRTest, PGroups_ctor)
+{
+ auto g = loco::make_graph();
+ auto node = g->nodes()->create<luci::CircleAdd>();
+
+ auto pnode = std::make_unique<luci::PNode>();
+ pnode->node = node;
+
+ auto pgroup = std::make_unique<luci::PGroup>();
+ pgroup->pnodes.push_back(std::move(pnode));
+
+ luci::PGroups pgroups;
+ pgroups.pgroups.push_back(std::move(pgroup));
+
+ ASSERT_NE(pgroups.pgroups.end(), pgroups.pgroups.begin());
+}
+
+// TODO add more tests with luci::PGroups
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionIRDump.h"
+
+#include "CircleOpCode.h"
+
+#include <iostream>
+
+namespace luci
+{
+
+void dump(std::ostream &os, const PNode *pnode)
+{
+ os << "PNode: " << pnode->group << ", " << pnode->node << ":" << luci::opcode_name(pnode->node)
+ << ":" << pnode->node->name() << std::endl;
+}
+
+void dump(std::ostream &os, const PGroup *pgroup)
+{
+ os << "--- PGroup: " << pgroup->group << std::endl;
+ os << "Input(s): ";
+ for (auto &node_in : pgroup->inputs)
+ os << node_in->name() << " ";
+ os << std::endl;
+ for (auto &pnode : pgroup->pnodes)
+ {
+ dump(os, pnode.get());
+ }
+ os << "Output(s): ";
+ for (auto &node_out : pgroup->outputs)
+ os << node_out->name() << " ";
+ os << std::endl;
+}
+
+void dump(std::ostream &os, const PGroups *pgroups)
+{
+ for (auto &pgroup : pgroups->pgroups)
+ {
+ dump(os, pgroup.get());
+ }
+ os << "--- Node2Group items: " << std::endl;
+ for (auto it = pgroups->node2group.begin(); it != pgroups->node2group.end(); ++it)
+ {
+ auto node = it->first;
+ auto group = it->second;
+ os << " Node: " << node << "(" << node->name() << "): " << group << std::endl;
+ }
+}
+
+} // namespace luci
+
+std::ostream &operator<<(std::ostream &os, const luci::PGroups *pgroups)
+{
+ luci::dump(os, pgroups);
+ return os;
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITION_IR_DUMP_H__
+#define __LUCI_PARTITION_IR_DUMP_H__
+
+#include "PartitionIR.h"
+
+#include <iostream>
+
+namespace luci
+{
+
+void dump(std::ostream &os, const PNode *pnode);
+void dump(std::ostream &os, const PGroup *pgroup);
+void dump(std::ostream &os, const PGroups *pgroups);
+
+} // namespace luci
+
+std::ostream &operator<<(std::ostream &os, const luci::PGroups *pgroups);
+
+#endif // __LUCI_PARTITION_IR_DUMP_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionMerge.h"
+
+#include <algorithm>
+
+namespace
+{
+
+/**
+ * @brief return true if pgroup_i output is one of the inputs of pgroup
+ */
+bool is_input_of(const luci::PGroup *pgroup_i, const luci::PGroup *pgroup)
+{
+ for (auto *output : pgroup_i->outputs)
+ {
+ for (auto *input : pgroup->inputs)
+ {
+ if (input == output)
+ return true;
+ }
+ }
+ return false;
+}
+
+/**
+ * @brief return true if there is only one input or all the inputs have same group
+ * @note pgroups is used to find group of pgroup
+ */
+bool is_input_same(const luci::PGroup *pgroup, const luci::PGroups *pgroups)
+{
+ assert(pgroups != nullptr);
+ assert(pgroup != nullptr);
+
+ const luci::PGroup *input_pgroup = nullptr;
+ std::string group;
+ for (auto &input : pgroup->inputs)
+ {
+ auto input_group = pgroups->group_of(input);
+ // NOTE: all the nodes should be registered and return should be valid group.
+ // convert_to_proups() should ensure this.
+ // assert here to find if there is any problem with this.
+ assert(not input_group.empty());
+ if (input_group.empty())
+ input_group = pgroups->default_group;
+
+ if (group.empty())
+ group = input_group;
+ else
+ {
+ if (group != input_group)
+ return false;
+ }
+ // if there are multiple inputs, all the inputs should be in same pgroup
+ // https://github.com/Samsung/ONE/issues/6230#issuecomment-801618150
+ // https://github.com/Samsung/ONE/issues/6230#issuecomment-801680531
+ auto pgroup_input = pgroups->pgroup_of(input);
+ if (pgroup_input != nullptr)
+ {
+ if (input_pgroup == nullptr)
+ input_pgroup = pgroup_input;
+ else
+ {
+ if (input_pgroup != pgroup_input)
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+/**
+ * @brief merge pgroup into pgroup_i
+ * @note output of pgroup_i should be input of pgroup
+ */
+void merge_into(luci::PGroup *pgroup, luci::PGroup *pgroup_i)
+{
+ for (auto &pnode : pgroup->pnodes)
+ {
+ // update pgroup for this pnode
+ pnode->pgroup = pgroup_i;
+ assert(pnode->group == pgroup_i->group);
+
+ // we don't need to add this in topological order:
+ // all the nodes will be created first then connection will be held
+ pgroup_i->pnodes.push_back(std::move(pnode));
+ // note: pnode is now nullptr as it's moved into pgroup_i->pnodes
+ }
+
+ for (auto &input : pgroup->inputs)
+ {
+ // add inputs of pgroup to pgroup_i if not member of pgroup_i
+ bool found_in_pgroup_i = false;
+ for (auto &pnode : pgroup_i->pnodes)
+ {
+ if (input == pnode->node)
+ {
+ found_in_pgroup_i = true;
+ break;
+ }
+ }
+ // skip if this input is already in the inputs
+ auto fit = std::find(pgroup_i->inputs.begin(), pgroup_i->inputs.end(), input);
+ if (fit != pgroup_i->inputs.end())
+ {
+ found_in_pgroup_i = true;
+ }
+ // note: if we force found_in_pgroup_i to false, for testing there will be
+ // unnecessary inputs
+ if (not found_in_pgroup_i)
+ {
+ // node input maybe in another pgroup
+ pgroup_i->inputs.push_back(input);
+ }
+ }
+ // add outputs of pgroup to pgroup_i outputs if not exist
+ for (auto &output : pgroup->outputs)
+ {
+ auto it = std::find(pgroup_i->outputs.begin(), pgroup_i->outputs.end(), output);
+ if (it == pgroup_i->outputs.end())
+ {
+ pgroup_i->outputs.push_back(output);
+ }
+ }
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * @brief This will merge pgroups with same group values in topological order
+ */
+std::unique_ptr<luci::PGroups> merge_pgroups(const luci::PGroups *s_pgroups)
+{
+ // Make a copy of pgroups to apply merge action
+ // Q) do we really need a copy?
+ auto d_pgroups = s_pgroups->make_copy();
+
+ // Merge partition graphs
+ // - This is initial implementation that works for limited networks
+ // - if A and B is same group -> if A is input of B -> ... -> merge B into A
+ auto &pgroups = d_pgroups->pgroups;
+ bool changed;
+ do
+ {
+ changed = false;
+ for (auto &pgroup_i : pgroups)
+ {
+ bool merged = false;
+ for (auto it = pgroups.begin(); it != pgroups.end(); ++it)
+ {
+ auto &pgroup = *it;
+
+ // skip if same object
+ if (pgroup->id == pgroup_i->id)
+ continue;
+ // skip if different group
+ if (pgroup->group != pgroup_i->group)
+ continue;
+ // skip if not connected
+ if (!is_input_of(pgroup_i.get(), pgroup.get()))
+ continue;
+ // skip if there are multiple inputs but inputs differ in group
+ if (!is_input_same(pgroup.get(), d_pgroups.get()))
+ continue;
+ // TODO add more condition may be needed
+
+ merge_into(pgroup.get(), pgroup_i.get());
+
+ auto eit = d_pgroups->id2pgroup.find(pgroup->id);
+ assert(eit != d_pgroups->id2pgroup.end());
+ d_pgroups->id2pgroup.erase(eit);
+
+ // remove merged pgroup from pgroups
+ pgroups.erase(it);
+
+ merged = true;
+ break;
+ }
+ if (merged)
+ {
+ changed = true;
+ break;
+ }
+ }
+ } while (changed);
+
+ return std::move(d_pgroups);
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITON_MERGE_H__
+#define __LUCI_PARTITON_MERGE_H__
+
+#include "PartitionIR.h"
+
+#include <memory>
+
+namespace luci
+{
+
+std::unique_ptr<luci::PGroups> merge_pgroups(const luci::PGroups *s_pgroups);
+
+} // namespace luci
+
+#endif // __LUCI_PARTITON_MERGE_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionPGroups.h"
+#include "PartitionIR.h"
+#include "CircleOpCode.h"
+
+#include "luci/Partition.h"
+#include "luci/Log.h"
+#include "luci/LogHelper.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+#include <loco.h>
+
+namespace
+{
+
+class IsVirtualNode final : public luci::CircleNodeVisitor<bool>
+{
+public:
+ bool visit(const luci::CircleInput *) final { return true; }
+ bool visit(const luci::CircleOutput *) final { return true; }
+ // TODO add all virtual nodes
+
+ // default is false
+ bool visit(const luci::CircleNode *) final { return false; }
+};
+
+bool check_allocate_partition(const luci::CircleNode *node)
+{
+ IsVirtualNode query;
+ if (node->accept(&query))
+ return false;
+ /**
+ * @note About CircleConst
+ * CirleConst acts like a part of some CircleNode and managing mulitiple
+ * used(referenced) CircleConst is a bit difficult if it's used across
+ * different PGroup. So we treat this different to other types.
+ * https://github.com/Samsung/ONE/issues/6230#issuecomment-809802813
+ */
+ if (dynamic_cast<const luci::CircleConst *>(node) != nullptr)
+ return false;
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+std::unique_ptr<luci::PGroups> produce_pgroups(const luci::Module *source,
+ const luci::PartitionTable &partition)
+{
+ assert(source != nullptr);
+ // TODO support multiple subgraphs
+ assert(source->size() == 1);
+
+ LOGGER(l);
+
+ auto pgroups = std::make_unique<luci::PGroups>();
+
+ pgroups->default_group = partition.default_group;
+
+ // Create a PGroup per CircleNode: each PGroup will have one CircleNode
+ auto graph = source->graph();
+ auto nodes = graph->nodes();
+ for (uint32_t idx = 0; idx < nodes->size(); ++idx)
+ {
+ auto node = loco::must_cast<luci::CircleNode *>(nodes->at(idx));
+
+ // check if node is normal node that we are interested
+ if (check_allocate_partition(node))
+ {
+ auto opcodename = luci::opcode_name(node);
+ assert(!opcodename.empty());
+
+ auto group = partition.default_group;
+ auto it = partition.byopcodes.find(opcodename);
+ if (it != partition.byopcodes.end())
+ group = it->second;
+
+ INFO(l) << "Op: " << node->name() << ": " << opcodename << ", " << node << ", " << group
+ << std::endl;
+
+ auto pgroup = std::make_unique<luci::PGroup>();
+ pgroup->group = group;
+ pgroup->id = idx + 1;
+
+ auto pnode = std::make_unique<luci::PNode>();
+ pnode->node = node;
+ pnode->group = group;
+ pnode->pgroup = pgroup.get();
+
+ pgroup->pnodes.push_back(std::move(pnode));
+
+ // Set input of PGroup
+ for (uint32_t in = 0; in < node->arity(); ++in)
+ {
+ auto input = loco::must_cast<luci::CircleNode *>(node->arg(in));
+ // this input maybe CircleInput in source graph
+ // --> not confident this is safe
+ pgroup->inputs.push_back(input);
+ }
+ // Set output of PGroup: node itself or multiple virtual outputs
+ // TODO support multiple virtual outputs
+ pgroup->outputs.push_back(node);
+
+ pgroups->node2group[node] = group;
+ pgroups->id2pgroup[pgroup->id] = pgroup.get();
+
+ pgroups->pgroups.push_back(std::move(pgroup));
+ }
+ else
+ {
+ INFO(l) << "Skip Op: " << node->name() << std::endl;
+ // record as default group
+ pgroups->node2group[node] = partition.default_group;
+ }
+ }
+
+ return std::move(pgroups);
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITON_PGROUPS_H__
+#define __LUCI_PARTITON_PGROUPS_H__
+
+#include "PartitionIR.h"
+
+#include "luci/Partition.h"
+
+#include <luci/IR/Module.h>
+
+namespace luci
+{
+
+/**
+ * @brief This will produce a PGroups from Module and PartitionTable.
+ * @note Each PGroup will hold one CircleNode and partition key value as group.
+ * Supports only single Graph in the Module for now.
+ */
+std::unique_ptr<luci::PGroups> produce_pgroups(const luci::Module *source,
+ const luci::PartitionTable &partition);
+
+} // namespace luci
+
+#endif // __LUCI_PARTITON_PGROUPS_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionPGroups.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <luci/IR/Nodes/CircleSqrt.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class SqrtGraphlet
+{
+public:
+ SqrtGraphlet() = default;
+
+public:
+ void init(loco::Graph *g, const ShapeU32 input_shape)
+ {
+ _sqrt = g->nodes()->create<luci::CircleSqrt>();
+ _sqrt->dtype(loco::DataType::S32);
+ _sqrt->name("sqrt");
+ }
+
+protected:
+ luci::CircleSqrt *_sqrt = nullptr;
+};
+
+class SqrtGraph : public TestIOGraph, public SqrtGraphlet
+{
+public:
+ SqrtGraph() = default;
+
+public:
+ void init(const ShapeU32 shape)
+ {
+ TestIOGraph::init(shape, shape);
+ SqrtGraphlet::init(g(), shape);
+
+ _sqrt->x(input());
+
+ output()->from(_sqrt);
+ }
+};
+
+} // namespace
+
+TEST(PartitionPGroupsTest, simple_produce)
+{
+ luci::Module module;
+
+ SqrtGraph g;
+ g.init({3, 3});
+ g.transfer_to(&module);
+
+ luci::PartitionTable pt;
+ pt.default_group = "A";
+
+ auto pgs = produce_pgroups(&module, pt);
+
+ ASSERT_EQ(1, pgs->pgroups.size());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionPModules.h"
+#include "ConnectNode.h"
+
+#include "luci/Service/CircleNodeClone.h"
+#include "luci/Log.h"
+
+#include <loco.h>
+
+namespace
+{
+
+void add_graph_input(loco::Graph *graph, luci::CircleInput *input_node)
+{
+ assert(graph != nullptr);
+ assert(input_node != nullptr);
+
+ auto graph_input = graph->inputs()->create();
+ graph_input->name(input_node->name());
+
+ // Set GraphInputOutputIndex for graph
+ input_node->index(graph_input->index());
+
+ // Data type
+ graph_input->dtype(input_node->dtype());
+
+ // Shape of GraphInput
+ auto input_shape = std::make_unique<loco::TensorShape>();
+ input_shape->rank(input_node->rank());
+ for (uint32_t r = 0; r < input_node->rank(); ++r)
+ {
+ if (input_node->dim(r).known())
+ input_shape->dim(r).set(input_node->dim(r).value());
+ }
+ graph_input->shape(std::move(input_shape));
+}
+
+void add_graph_output(loco::Graph *graph, luci::CircleOutput *output_node)
+{
+ assert(graph != nullptr);
+ assert(output_node != nullptr);
+
+ auto graph_output = graph->outputs()->create();
+ graph_output->name(output_node->name());
+
+ // Set GraphInputOutputIndex for graph
+ output_node->index(graph_output->index());
+
+ // Data type
+ graph_output->dtype(output_node->dtype());
+
+ // Shape of GraphOutput
+ auto output_shape = std::make_unique<loco::TensorShape>();
+ output_shape->rank(output_node->rank());
+ for (uint32_t r = 0; r < output_node->rank(); ++r)
+ {
+ if (output_node->dim(r).known())
+ output_shape->dim(r).set(output_node->dim(r).value());
+ }
+ graph_output->shape(std::move(output_shape));
+}
+
+/**
+ * @brief Build loco::graph from pgroup into graph
+ */
+void build_graph(loco::Graph *graph, const luci::PGroup *pgroup)
+{
+ LOGGER(l);
+
+ luci::CloneContext clonectx;
+
+ // add input node(s)
+ for (auto *input : pgroup->inputs)
+ {
+ auto *input_clone = graph->nodes()->create<luci::CircleInput>();
+ luci::copy_common_attributes(input, input_clone);
+
+ add_graph_input(graph, input_clone);
+ clonectx.emplace(input, input_clone);
+
+ INFO(l) << "MAP: "
+ << " input(" << input << ") -> " << input_clone << "(" << input_clone->name() << ")";
+ }
+
+ // add CircleConst for inputs
+ for (auto &pnode : pgroup->pnodes)
+ {
+ auto node = pnode->node;
+ uint32_t arity = node->arity();
+ for (uint32_t a = 0; a < arity; ++a)
+ {
+ auto in_a_const = dynamic_cast<luci::CircleConst *>(node->arg(a));
+ if (in_a_const != nullptr)
+ {
+ auto it = clonectx.find(in_a_const);
+ if (it == clonectx.end())
+ {
+ auto *clone = clone_node(in_a_const, graph);
+ clonectx.emplace(in_a_const, clone);
+
+ INFO(l) << "MAP: "
+ << " const(" << in_a_const << ") -> " << clone << "(" << clone->name() << ")";
+ }
+ }
+ }
+ }
+
+ // add nodes
+ for (auto &pnode : pgroup->pnodes)
+ {
+ auto *clone = clone_node(pnode->node, graph);
+ clonectx.emplace(pnode->node, clone);
+
+ INFO(l) << "MAP: "
+ << " node(" << pnode->node << ") -> " << clone << "(" << clone->name() << ")";
+ }
+ // connect nodes
+ for (auto &pnode : pgroup->pnodes)
+ {
+ clone_connect(pnode->node, clonectx);
+ }
+
+ // add output node(s)
+ for (auto *output : pgroup->outputs)
+ {
+ auto *output_clone = graph->nodes()->create<luci::CircleOutput>();
+ luci::copy_common_attributes(output, output_clone);
+ // note: we don't add output_clone to clonectx.
+ // logically, output is not used as an input to any other nodes.
+
+ auto it = clonectx.find(output);
+ assert(it != clonectx.end());
+ output_clone->from(it->second);
+
+ add_graph_output(graph, output_clone);
+
+ INFO(l) << "MAP: "
+ << "output(" << output << ") -> " << output_clone << "(" << output_clone->name() << ")"
+ << ": from " << it->second << "(" << it->second->name() << ")";
+ }
+}
+
+std::string make_name(const luci::PGroup *pgroup)
+{
+ auto &first_pnode = *pgroup->pnodes.begin();
+ auto *first_node = first_pnode->node;
+ std::string name = first_node->graph()->name();
+ name = name + "_" + pgroup->group;
+ return name;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * @brief This will produce list of luci::Module as PartedModules from pgroups
+ */
+luci::PartedModules produce_pmodules(const luci::PGroups *pgroups)
+{
+ LOGGER(l);
+
+ luci::PartedModules pms;
+
+ for (auto &pgroup : pgroups->pgroups)
+ {
+ luci::PartedModule pm;
+ pm.module = std::make_unique<luci::Module>();
+ pm.group = pgroup->group;
+
+ auto graph = loco::make_graph();
+
+ auto graph_name = make_name(pgroup.get());
+ graph->name(graph_name);
+
+ INFO(l) << "--- Partition Graph build----------------------";
+ INFO(l) << "--- name: " << graph_name;
+ build_graph(graph.get(), pgroup.get());
+
+ pm.module->add(std::move(graph));
+ pms.pmodules.emplace_back(std::move(pm));
+ }
+
+ return pms;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITON_PMODULES_H__
+#define __LUCI_PARTITON_PMODULES_H__
+
+#include "PartitionIR.h"
+
+#include "luci/Partition.h"
+
+namespace luci
+{
+
+luci::PartedModules produce_pmodules(const luci::PGroups *pgroups);
+
+} // namespace luci
+
+#endif // __LUCI_PARTITON_PMODULES_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionPModules.h"
+#include "PartitionPGroups.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <luci/IR/Nodes/CircleSqrt.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class SqrtGraphlet
+{
+public:
+ SqrtGraphlet() = default;
+
+public:
+ void init(loco::Graph *g, const ShapeU32 input_shape)
+ {
+ _sqrt = g->nodes()->create<luci::CircleSqrt>();
+ _sqrt->dtype(loco::DataType::S32);
+ _sqrt->name("sqrt");
+ }
+
+protected:
+ luci::CircleSqrt *_sqrt = nullptr;
+};
+
+class SqrtGraph : public TestIOGraph, public SqrtGraphlet
+{
+public:
+ SqrtGraph() = default;
+
+public:
+ void init(const ShapeU32 shape)
+ {
+ TestIOGraph::init(shape, shape);
+ SqrtGraphlet::init(g(), shape);
+
+ _sqrt->x(input());
+
+ output()->from(_sqrt);
+ }
+};
+
+} // namespace
+
+TEST(PartitionPModulesTest, simple_convert)
+{
+ luci::Module module;
+
+ SqrtGraph g;
+ g.init({3, 3});
+ g.transfer_to(&module);
+
+ luci::PartitionTable pt;
+ pt.default_group = "A";
+
+ auto pgs = produce_pgroups(&module, pt);
+ auto pms = produce_pmodules(pgs.get());
+
+ ASSERT_EQ(1, pms.pmodules.size());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionPModulesDump.h"
+
+#include "luci/LogHelper.h"
+
+#include <iostream>
+
+namespace luci
+{
+
+void dump(std::ostream &os, const PartedModule *pmodule)
+{
+ os << "--- PartedModule: " << pmodule->group << std::endl;
+ os << luci::fmt(pmodule->module->graph());
+}
+
+void dump(std::ostream &os, const PartedModules *pmodules)
+{
+ for (auto &pmodule : pmodules->pmodules)
+ {
+ dump(os, &pmodule);
+ }
+ os << std::endl;
+}
+
+} // namespace luci
+
+std::ostream &operator<<(std::ostream &os, const luci::PartedModules *pmodules)
+{
+ luci::dump(os, pmodules);
+ return os;
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITION_PMODULES_DUMP_H__
+#define __LUCI_PARTITION_PMODULES_DUMP_H__
+
+#include "luci/Partition.h"
+
+#include <iostream>
+
+namespace luci
+{
+
+void dump(std::ostream &os, const PartedModule *pmodule);
+void dump(std::ostream &os, const PartedModules *pmodules);
+
+} // namespace luci
+
+std::ostream &operator<<(std::ostream &os, const luci::PartedModules *pmodules);
+
+#endif // __LUCI_PARTITION_PMODULES_DUMP_H__
target_link_libraries(luci_pass PRIVATE luci_log)
target_link_libraries(luci_pass PRIVATE luci_service)
target_link_libraries(luci_pass PRIVATE luci_logex)
+target_link_libraries(luci_pass PRIVATE luci_profile)
target_link_libraries(luci_pass PRIVATE nncc_common)
target_link_libraries(luci_pass PRIVATE oops)
install(TARGETS luci_pass DESTINATION lib)
target_include_directories(luci_pass_test PRIVATE src)
target_link_libraries(luci_pass_test luci_pass)
target_link_libraries(luci_pass_test luci_lang)
+target_link_libraries(luci_pass_test luci_testhelper)
#target_link_libraries(luci_pass_test oops)
enum Algorithm
{
FuseAddWithTConv,
+ FuseBatchNormWithConv,
+ FuseBatchNormWithDwConv,
FuseBatchNormWithTConv,
FuseBCQ,
FuseInstanceNorm,
QuantizeDequantizeWeights,
QuantizeWithMinMax,
Requantize,
+ FoldAddV2,
+ FoldCast,
FoldDequantize,
+ FoldSparseToDense,
+ ForwardReshapeToUnaryOp,
SparsifyTensorPass,
FusePreActivationBatchNorm,
MakeBatchNormGammaPositive,
RemoveRedundantTranspose,
ReplaceMulAddWithDepthwiseConv,
SubstitutePackToReshape,
+ SubstituteSqueezeToReshape,
+ ConvertNCHWToNHWC,
+ RemoveUnnecessarySlice,
+ RemoveUnnecessaryStridedSlice,
+ RemoveUnnecessarySplit,
+ RemoveUnnecessaryReshape,
+ TransformMinMaxToRelu6Pass,
+ SubstituteTransposeToReshape,
+ RemoveRedundantReshape,
};
enum AlgorithmParameters
Sparsify_format,
Sparsify_block_size,
Sparsify_block_map,
+
+ // convert NCHW to NHWC
+ NCHW_to_NHWC_preserve_input_shape,
+ NCHW_to_NHWC_preserve_output_shape,
};
virtual ~Options() = default;
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_SHAPE_INFERENCE_PASS_H__
+#define __LUCI_CIRCLE_SHAPE_INFERENCE_PASS_H__
+
+#include <loco.h>
+
+#include <luci/ModulePass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Pass to infer shape of circle nodes
+ */
+class CircleShapeInferencePass : public luci::Pass
+{
+public:
+ virtual const char *name(void) const { return "luci::CircleShapeInferencePass"; }
+
+public:
+ bool run(luci::Module *m);
+ bool run(loco::Graph *graph);
+};
+
+} // namespace luci
+
+#endif //__LUCI_CIRCLE_SHAPE_INFERENCE_PASS_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CONVERT_NCHW_TO_NHWC_PASS_H__
+#define __LUCI_CONVERT_NCHW_TO_NHWC_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to convert NCHW Ops to NHWC
+ *
+ * @details Find operators that use NCHW layout and make them use NHWC.
+ * Strictly speaking, it is impossible to distinguish whether
+ * an operator is using NCHW or NHWC without programmers' annotations.
+ * But we guess the data layout of each operator as much as possible
+ * based on the assumptions described in the comments.
+ * Note that this Pass does not change the execution result even
+ * for the false-positive cases.
+ */
+struct ConvertNCHWToNHWCPass final : public logo::Pass
+{
+public:
+ ConvertNCHWToNHWCPass(bool preserve_input, bool preserve_output)
+ : _preserve_input(preserve_input), _preserve_output(preserve_output)
+ {
+ // Do nothing
+ }
+
+ ConvertNCHWToNHWCPass() = delete;
+
+ virtual ~ConvertNCHWToNHWCPass() = default;
+
+ const char *name(void) const final { return "luci::ConvertNCHWToNHWCPass"; }
+
+ bool run(loco::Graph *g) final;
+
+private:
+ bool _preserve_input = false;
+ bool _preserve_output = false;
+};
+
+} // namespace luci
+
+#endif // __LUCI_CONVERT_NCHW_TO_NHWC_PASS_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FOLD_ADD_V2_PASS_H__
+#define __LUCI_FOLD_ADD_V2_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to fold AddV2 to a constant tensor
+ *
+ */
+struct FoldAddV2Pass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::FoldAddV2Pass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FOLD_ADD_V2_PASS_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FOLD_CAST_PASS_H__
+#define __LUCI_FOLD_CAST_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to fold Cast to a constant tensor
+ *
+ */
+struct FoldCastPass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::FoldCastPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FOLD_CAST_PASS_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FOLD_SPARSE_TO_DENSE_PASS_H__
+#define __LUCI_FOLD_SPARSE_TO_DENSE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to fold SparseToDense to a constant tensor
+ *
+ */
+struct FoldSparseToDensePass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::FoldSparseToDensePass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FOLD_SPARSE_TO_DENSE_PASS_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FORWARD_RESHAPE_TO_UNARYOP_PASS_H__
+#define __LUCI_FORWARD_RESHAPE_TO_UNARYOP_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to Forward send Reshape after UnaryOp.
+ */
+struct ForwardReshapeToUnaryOpPass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::ForwardReshapeToUnaryOpPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FORWARD_RESHAPE_TO_UNARYOP_PASS_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FUSE_BATCH_NORM_WITH_CONV_PASS_H__
+#define __LUCI_FUSE_BATCH_NORM_WITH_CONV_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to fuse Batch Normalization into CircleConv
+ */
+struct FuseBatchNormWithConvPass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::FuseBatchNormWithConvPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FUSE_BATCH_NORM_WITH_CONV_PASS_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FUSE_BATCH_NORM_WITH_DWCONV_PASS_H__
+#define __LUCI_FUSE_BATCH_NORM_WITH_DWCONV_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to fuse Batch Normalization into CircleDepthWiseConv2D
+ */
+struct FuseBatchNormWithDwConvPass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::FuseBatchNormWithDwConvPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FUSE_BATCH_NORM_WITH_DWCONV_PASS_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_FUSE_BATCH_NORM_WITH_TCONV_PASS_H__
-#define __LUCI_FUSE_BATCH_NORM_WITH_TCONV_PASS_H__
-
-#include <logo/Pass.h>
-
-namespace luci
-{
-
-/**
- * @brief Class to fuse Batch Normalization into CircleTransposeConv
- */
-struct FuseBatchNormWithTConvPass final : public logo::Pass
-{
- const char *name(void) const final { return "luci::FuseBatchNormWithTConvPass"; }
-
- bool run(loco::Graph *g) final;
-};
-
-} // namespace luci
-
-#endif // __LUCI_FUSE_BATCH_NORM_WITH_TCONV_PASS_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FUSE_BATCH_NORM_WITH_TCONV_PASS_H__
+#define __LUCI_FUSE_BATCH_NORM_WITH_TCONV_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to fuse Batch Normalization into CircleTransposeConv
+ */
+struct FuseBatchNormWithTConvPass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::FuseBatchNormWithTConvPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FUSE_BATCH_NORM_WITH_TCONV_PASS_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_MIGRATE_LEGACY_SHAPE_DTYPE_PASS_H__
-#define __LUCI_MIGRATE_LEGACY_SHAPE_DTYPE_PASS_H__
-
-#include <loco.h>
-
-#include <luci/ModulePass.h>
-
-namespace luci
-{
-
-/**
- * @brief Pass to copy shape/dtype of loco to circle node
- *
- * CAUTION : This pass will be removed after refactoring is finished
- */
-class MigrateLegacyShapeDtypePass : public luci::Pass
-{
-public:
- virtual const char *name(void) const { return "luci::MigrateLegacyShapeDtypePass"; }
-
-public:
- bool run(luci::Module *m);
- bool run(loco::Graph *graph);
-};
-
-} // namespace luci
-
-#endif //__LUCI_MIGRATE_LEGACY_SHAPE_DTYPE_PASS_H__
public:
QuantizeDequantizeWeightsPass(loco::DataType input_dtype, loco::DataType output_dtype,
QuantizationGranularity granularity)
- : _input_dtype{input_dtype}, _output_dtype{output_dtype}, _granularity{granularity}
+ : _input_dtype{input_dtype}, _output_dtype{output_dtype}, _granularity{granularity}
{
// DO NOTHING
}
public:
QuantizeWithMinMaxPass(loco::DataType input_dtype, loco::DataType output_dtype,
QuantizationGranularity granularity)
- : _input_dtype{input_dtype}, _output_dtype{output_dtype}, _granularity{granularity}
+ : _input_dtype{input_dtype}, _output_dtype{output_dtype}, _granularity{granularity}
{
// DO NOTHING
}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REMOVE_REDUNDANT_RESHAPE_PASS_H__
+#define __LUCI_REMOVE_REDUNDANT_RESHAPE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to remove redundant Reshape node into 1 Reshape node.
+ * @details This class will update consecutive two Reshape node into single Reshape node.
+ * As Reshape operation just change shape, not buffer, former reshape could be unnecessary.
+ */
+struct RemoveRedundantReshapePass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::RemoveRedundantReshapePass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REMOVE_REDUNDANT_RESHAPE_PASS_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REMOVE_UNNECESSARY_RESHAPE_PASS_H__
+#define __LUCI_REMOVE_UNNECESSARY_RESHAPE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to Remove Unnecessary(input shape and output shape same) Reshape node.
+ */
+struct RemoveUnnecessaryReshapePass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::RemoveUnnecessaryReshapePass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REMOVE_UNNECESSARY_RESHAPE_PASS_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REMOVE_NO_EFFECT_SLICE_PASS_H__
+#define __LUCI_REMOVE_NO_EFFECT_SLICE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to Remove Unnecessary(input and output are same) Slice node.
+ */
+struct RemoveUnnecessarySlicePass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::RemoveUnnecessarySlicePass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REMOVE_NO_EFFECT_SLICE_PASS_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REMOVE_UNNECESSARY_SPLIT_PASS_H__
+#define __LUCI_REMOVE_UNNECESSARY_SPLIT_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Remove unnecessary Split OP
+ */
+struct RemoveUnnecessarySplitPass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::RemoveUnnecessarySplitPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REMOVE_UNNECESSARY_SPLIT_PASS_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REMOVE_UNNECESSARY_STRIDED_SLICE_PASS_H__
+#define __LUCI_REMOVE_UNNECESSARY_STRIDED_SLICE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to Remove Unnecessary(input and output are same) StridedSlice node.
+ */
+struct RemoveUnnecessaryStridedSlicePass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::RemoveUnnecessaryStridedSlicePass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REMOVE_UNNECESSARY_STRIDED_SLICE_PASS_H__
{
public:
RequantizePass(loco::DataType input_dtype, loco::DataType output_dtype)
- : _input_dtype{input_dtype}, _output_dtype{output_dtype}
+ : _input_dtype{input_dtype}, _output_dtype{output_dtype}
{
// DO NOTHING
}
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_SHAPE_INFERENCE_PASS_H__
-#define __LUCI_SHAPE_INFERENCE_PASS_H__
-
-#include <loco.h>
-
-#include <luci/ModulePass.h>
-
-namespace luci
-{
-
-/**
- * @brief Pass to infer shape of nodes
- */
-class ShapeInferencePass : public luci::Pass
-{
-public:
- virtual const char *name(void) const { return "luci::ShapeInferencePass"; }
-
-public:
- bool run(luci::Module *m);
- bool run(loco::Graph *graph);
-};
-
-} // namespace luci
-
-#endif //__LUCI_SHAPE_INFERENCE_PASS_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_SHAPE_SIGNATURE_INFERENCE_PASS_H__
-#define __LUCI_SHAPE_SIGNATURE_INFERENCE_PASS_H__
-
-#include <loco.h>
-
-#include <luci/ModulePass.h>
-
-namespace luci
-{
-
-/**
- * @brief Pass to infer shape_signature of nodes
- */
-class ShapeSignatureInferencePass : public luci::Pass
-{
-public:
- virtual const char *name(void) const { return "luci::ShapeSignatureInferencePass"; }
-
-public:
- bool run(luci::Module *m);
- bool run(loco::Graph *graph);
-};
-
-} // namespace luci
-
-#endif //__LUCI_SHAPE_SIGNATURE_INFERENCE_PASS_H__
SparsifyTensorPass(const std::string &tensor_name, const std::vector<int32_t> &traversal_order,
const std::vector<DimensionType> &format,
const std::vector<int32_t> &block_size, const std::vector<int32_t> &block_map)
- : _tensor_name{tensor_name}, _traversal_order{traversal_order}, _format{format},
- _block_size{block_size}, _block_map{block_map}
+ : _tensor_name{tensor_name}, _traversal_order{traversal_order}, _format{format},
+ _block_size{block_size}, _block_map{block_map}
{
// DO NOTHING
}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_SUBSTITUTE_SQUEEZE_TO_RESHAPE_PASS_H__
+#define __LUCI_SUBSTITUTE_SQUEEZE_TO_RESHAPE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to Substitute Squeeze to Reshape node for certain conditions.
+ */
+struct SubstituteSqueezeToReshapePass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::SubstituteSqueezeToReshapePass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_SUBSTITUTE_SQUEEZE_TO_RESHAPE_PASS_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_SUBSTITUTE_TRANSPOSE_TO_RESHAPE_PASS_H__
+#define __LUCI_SUBSTITUTE_TRANSPOSE_TO_RESHAPE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to Substitute Transpose with certain input shape condition to single reshape node.
+ */
+struct SubstituteTransposeToReshapePass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::SubstituteTransposeToReshapePass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_SUBSTITUTE_TRANSPOSE_TO_RESHAPE_PASS_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_TRANSFORM_MIN_MAX_TO_RELU6_PASS_H__
+#define __LUCI_TRANSFORM_MIN_MAX_TO_RELU6_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to transform Maximum(Minimum(input, 6), 0) to Relu6
+ */
+struct TransformMinMaxToRelu6Pass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::TransformMinMaxToRelu6Pass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_TRANSFORM_MIN_MAX_TO_RELU6_PASS_H__
+++ /dev/null
-
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_TYPE_INFERENCE_PASS_H__
-#define __LUCI_TYPE_INFERENCE_PASS_H__
-
-#include <loco.h>
-
-#include <luci/ModulePass.h>
-
-namespace luci
-{
-
-/**
- * @brief Pass to infer type of nodes
- */
-class TypeInferencePass : public luci::Pass
-{
-public:
- virtual const char *name(void) const { return "luci::TypeInferencePass"; }
-
-public:
- bool run(luci::Module *m);
- bool run(loco::Graph *graph);
-};
-
-} // namespace luci
-
-#endif //__LUCI_TYPE_INFERENCE_PASS_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BatchNormPatternFinder.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace luci
+{
+
+bool is_batchnorm_add(const luci::CircleAdd *add, luci::CircleMul *&mul, luci::CircleConst *&beta)
+{
+ auto x = loco::must_cast<luci::CircleNode *>(add->x());
+ auto y = loco::must_cast<luci::CircleNode *>(add->y());
+
+ luci::CircleMul *pred = nullptr;
+ luci::CircleConst *constant = nullptr;
+
+ if (x->opcode() == luci::CircleOpcode::CIRCLECONST && y->opcode() == luci::CircleOpcode::MUL)
+ {
+ pred = loco::must_cast<luci::CircleMul *>(y);
+ constant = loco::must_cast<luci::CircleConst *>(x);
+ }
+ else if (x->opcode() == luci::CircleOpcode::MUL && y->opcode() == luci::CircleOpcode::CIRCLECONST)
+ {
+ pred = loco::must_cast<luci::CircleMul *>(x);
+ constant = loco::must_cast<luci::CircleConst *>(y);
+ }
+ else
+ {
+ return false;
+ }
+
+ if (constant->rank() != 1)
+ return false;
+
+ auto channel_dim = constant->dim(0);
+ // Assumption: Layout is channel-last
+ if (!(channel_dim == add->dim(add->rank() - 1)))
+ return false;
+
+ mul = pred;
+ beta = constant;
+ return true;
+}
+
+bool is_batchnorm_add(const luci::CircleAdd *add)
+{
+ // for dummy mul and beta
+ luci::CircleMul *mul = nullptr;
+ luci::CircleConst *beta = nullptr;
+
+ return is_batchnorm_add(add, mul, beta);
+}
+
+bool is_batchnorm_mul(const luci::CircleMul *mul, luci::CircleNode *&pred_node,
+ luci::CircleConst *&gamma)
+{
+ auto x = dynamic_cast<luci::CircleConst *>(mul->x());
+ auto y = dynamic_cast<luci::CircleConst *>(mul->y());
+
+ luci::CircleNode *pred = nullptr;
+ luci::CircleConst *constant = nullptr;
+
+ if (x != nullptr && y == nullptr)
+ {
+ pred = loco::must_cast<luci::CircleNode *>(mul->y());
+ constant = x;
+ }
+ else if (x == nullptr && y != nullptr)
+ {
+ pred = loco::must_cast<luci::CircleNode *>(mul->x());
+ constant = y;
+ }
+ else
+ {
+ return false;
+ }
+
+ if (constant->rank() != 1)
+ return false;
+
+ auto channel_dim = constant->dim(0);
+ // Assumption: Layout is channel-last
+ if (!(channel_dim == mul->dim(mul->rank() - 1)))
+ return false;
+
+ pred_node = pred;
+ gamma = constant;
+ return true;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PASS_BATCH_NORM_PATTERN_FINDER_H__
+#define __LUCI_PASS_BATCH_NORM_PATTERN_FINDER_H__
+
+#include <luci/IR/CircleNodes.h>
+
+namespace luci
+{
+
+/**
+ * @brief Find Mul-Add pattern and return Mul and beta as BatchNorm
+ */
+bool is_batchnorm_add(const luci::CircleAdd *add, luci::CircleMul *&mul, luci::CircleConst *&beta);
+
+/**
+ * @brief Find Mul-Add pattern
+ */
+bool is_batchnorm_add(const luci::CircleAdd *add);
+
+/**
+ * @brief Find Const-Mul pattern and return Node and gamma as BatchNorm
+ */
+bool is_batchnorm_mul(const luci::CircleMul *mul, luci::CircleNode *&pred_node,
+ luci::CircleConst *&gamma);
+
+} // namespace luci
+
+#endif // __LUCI_PASS_BATCH_NORM_PATTERN_FINDER_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BatchNormPatternFinder.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace luci
+{
+namespace test
+{
+
+/**
+ * @brief Graphlet with Add and Const as beta from BatchNorm
+ */
+class AddBetaGraphlet
+{
+public:
+ AddBetaGraphlet() = default;
+
+ void init(loco::Graph *g, const ShapeU32 shape, luci::FusedActFunc actf)
+ {
+ _add = g->nodes()->create<luci::CircleAdd>();
+ _add_beta = g->nodes()->create<luci::CircleConst>();
+
+ _add->dtype(loco::DataType::FLOAT32);
+ _add_beta->dtype(loco::DataType::FLOAT32);
+
+ _add->fusedActivationFunction(actf);
+
+ assert(shape.size() > 0);
+ auto last_it = std::prev(shape.end(), 1);
+ auto channel_size = *last_it;
+
+ _add->shape(shape);
+ _add_beta->shape({channel_size});
+ _add_beta->size<loco::DataType::FLOAT32>(channel_size);
+ for (uint32_t i = 0; i < channel_size; i++)
+ _add_beta->at<loco::DataType::FLOAT32>(i) = i;
+
+ _add->name("add");
+ _add_beta->name("add_beta");
+ }
+
+public:
+ luci::CircleAdd *add() { return _add; }
+
+protected:
+ luci::CircleAdd *_add = nullptr;
+ luci::CircleConst *_add_beta = nullptr;
+};
+
+/**
+ * @brief Graphlet with Mul and Const as gamma from BatchNorm
+ */
+class MulGammaGraphlet
+{
+public:
+ MulGammaGraphlet() = default;
+
+ void init(loco::Graph *g, const ShapeU32 shape, luci::FusedActFunc actf)
+ {
+ _mul = g->nodes()->create<luci::CircleMul>();
+ _mul_gamma = g->nodes()->create<luci::CircleConst>();
+
+ _mul->dtype(loco::DataType::FLOAT32);
+ _mul_gamma->dtype(loco::DataType::FLOAT32);
+
+ _mul->fusedActivationFunction(actf);
+
+ assert(shape.size() > 0);
+ auto last_it = std::prev(shape.end(), 1);
+ auto channel_size = *last_it;
+
+ _mul->shape(shape);
+ _mul_gamma->shape({channel_size});
+ _mul_gamma->size<loco::DataType::FLOAT32>(channel_size);
+ for (uint32_t i = 0; i < channel_size; i++)
+ _mul_gamma->at<loco::DataType::FLOAT32>(i) = i;
+
+ _mul->name("mul");
+ _mul_gamma->name("mul_gamma");
+ }
+
+public:
+ luci::CircleMul *mul(void) { return _mul; }
+
+protected:
+ luci::CircleMul *_mul = nullptr;
+ luci::CircleConst *_mul_gamma = nullptr;
+};
+
+/**
+ * @brief Graph of Mul-Add pattern from BatchNorm
+ */
+class MulAddGraph : public TestIOGraph, public AddBetaGraphlet, public MulGammaGraphlet
+{
+public:
+ MulAddGraph() = default;
+
+ void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+ {
+ TestIOGraph::init(shape_in, shape_out);
+ MulGammaGraphlet::init(g(), shape_in, luci::FusedActFunc::NONE);
+ AddBetaGraphlet::init(g(), shape_out, luci::FusedActFunc::RELU);
+
+ // connect network
+ _mul->x(input());
+ _mul->y(_mul_gamma);
+ _add->x(_mul);
+ _add->y(_add_beta);
+ output()->from(_add);
+ }
+};
+
+/**
+ * @brief Graph of Add with Const
+ */
+class AddGraph : public TestIOGraph, public AddBetaGraphlet
+{
+public:
+ AddGraph() = default;
+
+ void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+ {
+ TestIOGraph::init(shape_in, shape_out);
+ AddBetaGraphlet::init(g(), shape_in, luci::FusedActFunc::RELU);
+
+ // connect network
+ _add->x(input());
+ _add->y(_add_beta);
+ output()->from(_add);
+ }
+};
+
+} // namespace test
+} // namespace luci
+
+class BatchNormPatternFinderMulAddTest : public ::testing::Test
+{
+public:
+ BatchNormPatternFinderMulAddTest() = default;
+
+protected:
+ luci::test::MulAddGraph _mag;
+};
+
+class BatchNormPatternFinderAddTest : public ::testing::Test
+{
+public:
+ BatchNormPatternFinderAddTest() = default;
+
+protected:
+ luci::test::AddGraph _ag;
+};
+
+TEST_F(BatchNormPatternFinderMulAddTest, is_batchnorm_add)
+{
+ _mag.init({1, 16, 16, 4}, {1, 16, 16, 4});
+
+ luci::CircleMul *mul = nullptr;
+ luci::CircleConst *beta = nullptr;
+
+ auto res = luci::is_batchnorm_add(_mag.add(), mul, beta);
+ ASSERT_TRUE(res);
+ ASSERT_NE(nullptr, mul);
+ ASSERT_NE(nullptr, beta);
+}
+
+TEST_F(BatchNormPatternFinderMulAddTest, is_batchnorm_add2)
+{
+ _mag.init({1, 16, 16, 4}, {1, 16, 16, 4});
+
+ auto res = luci::is_batchnorm_add(_mag.add());
+ ASSERT_TRUE(res);
+}
+
+TEST_F(BatchNormPatternFinderAddTest, is_batchnorm_add_NEG)
+{
+ _ag.init({1, 16, 16, 4}, {1, 16, 16, 4});
+
+ luci::CircleMul *mul = nullptr;
+ luci::CircleConst *beta = nullptr;
+
+ auto res = luci::is_batchnorm_add(_ag.add(), mul, beta);
+ ASSERT_FALSE(res);
+}
+
+TEST_F(BatchNormPatternFinderMulAddTest, is_batchnorm_mul)
+{
+ _mag.init({1, 16, 16, 4}, {1, 16, 16, 4});
+
+ luci::CircleNode *pred = nullptr;
+ luci::CircleConst *gamma = nullptr;
+
+ auto res = luci::is_batchnorm_mul(_mag.mul(), pred, gamma);
+ ASSERT_TRUE(res);
+ ASSERT_NE(nullptr, pred);
+ ASSERT_NE(nullptr, gamma);
+}
#include "luci/CircleOptimizer.h"
+#include "luci/Pass/ConvertNCHWToNHWCPass.h"
+#include "luci/Pass/FoldAddV2Pass.h"
+#include "luci/Pass/FoldCastPass.h"
#include "luci/Pass/FoldDequantizePass.h"
+#include "luci/Pass/FoldSparseToDensePass.h"
+#include "luci/Pass/ForwardReshapeToUnaryOpPass.h"
#include "luci/Pass/FuseActivationFunctionPass.h"
#include "luci/Pass/FuseAddWithTConvPass.h"
-#include "luci/Pass/FuseBatchNormWithTConv.h"
+#include "luci/Pass/FuseBatchNormWithConvPass.h"
+#include "luci/Pass/FuseBatchNormWithDwConvPass.h"
+#include "luci/Pass/FuseBatchNormWithTConvPass.h"
#include "luci/Pass/FuseBCQPass.h"
#include "luci/Pass/FuseInstanceNormPass.h"
#include "luci/Pass/FusePreActivationBatchNormPass.h"
#include "luci/Pass/MakeBatchNormGammaPositivePass.h"
#include "luci/Pass/PropagateQuantParamPass.h"
+#include "luci/Pass/RemoveRedundantReshapePass.h"
#include "luci/Pass/RemoveRedundantTransposePass.h"
+#include "luci/Pass/RemoveUnnecessaryReshapePass.h"
+#include "luci/Pass/RemoveUnnecessarySlicePass.h"
+#include "luci/Pass/RemoveUnnecessaryStridedSlicePass.h"
+#include "luci/Pass/RemoveUnnecessarySplitPass.h"
#include "luci/Pass/ReplaceMulAddWithDepthwiseConvPass.h"
#include "luci/Pass/ResolveCustomOpAddPass.h"
#include "luci/Pass/ResolveCustomOpBatchMatMulPass.h"
#include "luci/Pass/SparsifyTensorPass.h"
#include "luci/Pass/ShuffleWeightTo16x1Float32Pass.h"
#include "luci/Pass/SubstitutePackToReshapePass.h"
+#include "luci/Pass/SubstituteSqueezeToReshapePass.h"
+#include "luci/Pass/SubstituteTransposeToReshapePass.h"
+#include "luci/Pass/TransformMinMaxToRelu6Pass.h"
// TODO add more passes
-#include "luci/Pass/ShapeInferencePass.h"
-#include "luci/Pass/ShapeSignatureInferencePass.h"
-#include "luci/Pass/TypeInferencePass.h"
-
-// Following passes will be removed after refactoring is finished
-#include "luci/Pass/MigrateLegacyShapeDtypePass.h"
+#include "luci/Pass/CircleShapeInferencePass.h"
+#include "luci/Pass/CircleTypeInferencePass.h"
// logo passes
#include <logo/RemoveDeadNodeWithQueryPass.h>
#include "ModulePhase.h"
#include "ProgressReporter.h"
-#include "CircleOptimizerUtils.h"
+#include "helpers/Strings.h"
+
+#include "QuantizedModelVerifier.h"
#include <luci/IR/CircleNodes.h>
#include <logo/Phase.h>
namespace
{
-std::vector<int> parseIntFromCommadelimitedStr(std::string str)
-{
- std::vector<int> ret;
- std::istringstream is(str);
- for (uint32_t i; is >> i;)
- {
- assert(i != ',');
- ret.push_back(i);
- if (is.peek() == ',')
- is.ignore();
- }
- return ret;
-}
-
using namespace luci;
class OptimizeOptionsImpl final : public luci::CircleOptimizer::Options
{
luci::Phase phase;
- // Following passes will be deprecated after refactoring is finished.
- phase.emplace_back(std::make_unique<luci::MigrateLegacyShapeDtypePass>());
-
// Following passes are needed everytime when other passes create new node or modify some nodes.
- phase.emplace_back(std::make_unique<luci::ShapeInferencePass>());
- phase.emplace_back(std::make_unique<luci::ShapeSignatureInferencePass>());
- phase.emplace_back(std::make_unique<luci::TypeInferencePass>());
+ phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+ phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
if (_options->query(Options::Algorithm::FuseBCQ))
{
/* TRANSFORM DECLARATION BEGIN */
phase.emplace_back(std::make_unique<logo::RemoveDeadNodeWithQueryPass>());
- // Following passes will be deprecated after refactoring is finished.
- phase.emplace_back(std::make_unique<luci::MigrateLegacyShapeDtypePass>());
-
// Following passes are needed everytime when other passes create new node or modify some nodes.
- phase.emplace_back(std::make_unique<luci::TypeInferencePass>());
- phase.emplace_back(std::make_unique<luci::ShapeInferencePass>());
- phase.emplace_back(std::make_unique<luci::ShapeSignatureInferencePass>());
+ phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+ phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
if (_options->query(Options::Algorithm::ResolveCustomOpAdd))
{
{
phase.emplace_back(std::make_unique<FuseInstanceNormPass>());
}
+ if (_options->query(Options::Algorithm::FuseBatchNormWithConv))
+ {
+ phase.emplace_back(std::make_unique<FuseBatchNormWithConvPass>());
+ }
+ if (_options->query(Options::Algorithm::FuseBatchNormWithDwConv))
+ {
+ phase.emplace_back(std::make_unique<FuseBatchNormWithDwConvPass>());
+ }
if (_options->query(Options::Algorithm::FuseBatchNormWithTConv))
{
phase.emplace_back(std::make_unique<FuseBatchNormWithTConvPass>());
{
phase.emplace_back(std::make_unique<FuseActivationFunctionPass>());
}
+ if (_options->query(Options::Algorithm::FoldAddV2))
+ {
+ phase.emplace_back(std::make_unique<luci::FoldAddV2Pass>());
+ }
+ if (_options->query(Options::Algorithm::FoldCast))
+ {
+ phase.emplace_back(std::make_unique<luci::FoldCastPass>());
+ }
if (_options->query(Options::Algorithm::FoldDequantize))
{
phase.emplace_back(std::make_unique<luci::FoldDequantizePass>());
}
+ if (_options->query(Options::Algorithm::FoldSparseToDense))
+ {
+ phase.emplace_back(std::make_unique<luci::FoldSparseToDensePass>());
+ }
+ if (_options->query(Options::Algorithm::ForwardReshapeToUnaryOp))
+ {
+ phase.emplace_back(std::make_unique<luci::ForwardReshapeToUnaryOpPass>());
+ }
if (_options->query(Options::Algorithm::FusePreActivationBatchNorm))
{
phase.emplace_back(std::make_unique<luci::FusePreActivationBatchNormPass>());
{
phase.emplace_back(std::make_unique<luci::ShuffleWeightTo16x1Float32Pass>());
}
+ if (_options->query(Options::Algorithm::RemoveUnnecessaryReshape))
+ {
+ phase.emplace_back(std::make_unique<luci::RemoveUnnecessaryReshapePass>());
+ }
+ if (_options->query(Options::Algorithm::RemoveUnnecessarySlice))
+ {
+ phase.emplace_back(std::make_unique<luci::RemoveUnnecessarySlicePass>());
+ }
+ if (_options->query(Options::Algorithm::RemoveUnnecessaryStridedSlice))
+ {
+ phase.emplace_back(std::make_unique<luci::RemoveUnnecessaryStridedSlicePass>());
+ }
+ if (_options->query(Options::Algorithm::RemoveUnnecessarySplit))
+ {
+ phase.emplace_back(std::make_unique<luci::RemoveUnnecessarySplitPass>());
+ }
+ if (_options->query(Options::Algorithm::RemoveRedundantReshape))
+ {
+ phase.emplace_back(std::make_unique<luci::RemoveRedundantReshapePass>());
+ }
if (_options->query(Options::Algorithm::RemoveRedundantTranspose))
{
phase.emplace_back(std::make_unique<luci::RemoveRedundantTransposePass>());
{
phase.emplace_back(std::make_unique<luci::SubstitutePackToReshapePass>());
}
+ if (_options->query(Options::Algorithm::SubstituteSqueezeToReshape))
+ {
+ phase.emplace_back(std::make_unique<luci::SubstituteSqueezeToReshapePass>());
+ }
+ if (_options->query(Options::Algorithm::SubstituteTransposeToReshape))
+ {
+ phase.emplace_back(std::make_unique<luci::SubstituteTransposeToReshapePass>());
+ }
+ if (_options->query(Options::Algorithm::TransformMinMaxToRelu6Pass))
+ {
+ phase.emplace_back(std::make_unique<luci::TransformMinMaxToRelu6Pass>());
+ }
+ if (_options->query(Options::Algorithm::ConvertNCHWToNHWC))
+ {
+ bool preserve_input =
+ _options->param(Options::AlgorithmParameters::NCHW_to_NHWC_preserve_input_shape) == "true";
+ bool preserve_output =
+ _options->param(Options::AlgorithmParameters::NCHW_to_NHWC_preserve_output_shape) == "true";
+
+ phase.emplace_back(
+ std::make_unique<luci::ConvertNCHWToNHWCPass>(preserve_input, preserve_output));
+ }
/* TRANSFORM DECLARATION END */
}
luci::QuantizeDequantizeWeightsPass fake_quantizer(
- str_to_dtype(input_dtype), str_to_dtype(output_dtype), str_to_granularity(granularity));
+ str_to_dtype(input_dtype), str_to_dtype(output_dtype), str_to_granularity(granularity));
fake_quantizer.run(g);
}
phase.emplace_back(std::make_unique<luci::PropagateQuantParamPass>());
- phase.emplace_back(std::make_unique<luci::ShapeInferencePass>());
- phase.emplace_back(std::make_unique<luci::TypeInferencePass>());
+ phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+ phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
phase.emplace_back(std::make_unique<logo::RemoveDeadNodeWithQueryPass>());
ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
phase_runner.attach(&prog);
phase_runner.run(phase);
+
+ // Verify the type/granularity of the quantized model
+ luci::QuantizedModelVerifier verifier(str_to_dtype(output_dtype),
+ str_to_granularity(granularity));
+ verifier.verify(g);
}
// Requantize
logo::Phase phase;
// Do Shape/Type inference
- phase.emplace_back(std::make_unique<luci::ShapeInferencePass>());
- phase.emplace_back(std::make_unique<luci::TypeInferencePass>());
+ phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+ phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
{
std::string tensor_name = _options->param(Options::AlgorithmParameters::Sparsify_tensor_name);
std::string str_tarversal_order =
- _options->param(Options::AlgorithmParameters::Sparsify_traversal_order);
+ _options->param(Options::AlgorithmParameters::Sparsify_traversal_order);
std::string str_format = _options->param(Options::AlgorithmParameters::Sparsify_format);
std::string str_block_size = _options->param(Options::AlgorithmParameters::Sparsify_block_size);
std::string str_block_map = _options->param(Options::AlgorithmParameters::Sparsify_block_map);
// traversal order
- std::vector<int32_t> traversal_order = parseIntFromCommadelimitedStr(str_tarversal_order);
+ std::vector<int32_t> traversal_order = csv_to_vector<int32_t>(str_tarversal_order);
// format
std::vector<DimensionType> format;
std::istringstream is(str_format);
is.ignore();
}
// block size
- std::vector<int32_t> block_size = parseIntFromCommadelimitedStr(str_block_size);
+ std::vector<int32_t> block_size = csv_to_vector<int32_t>(str_block_size);
// block map
- std::vector<int32_t> block_map = parseIntFromCommadelimitedStr(str_block_map);
+ std::vector<int32_t> block_map = csv_to_vector<int32_t>(str_block_map);
luci::SparsifyTensorPass sparsifier{tensor_name, traversal_order, format, block_size,
block_map};
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/CircleOptimizer.h"
+
+#include <gtest/gtest.h>
+
+using namespace luci;
+using Algorithms = luci::CircleOptimizer::Options::Algorithm;
+using AlgorithmParameters = luci::CircleOptimizer::Options::AlgorithmParameters;
+
+TEST(CircleOptimizerTest, optimize_algorithms)
+{
+ loco::Graph g;
+ luci::CircleOptimizer o;
+
+ auto options = o.options();
+
+ // NOTE these are added to cover the test
+ // TODO add more if needed
+ options->enable(Algorithms::FoldAddV2);
+ options->enable(Algorithms::FoldCast);
+ options->enable(Algorithms::FoldDequantize);
+ options->enable(Algorithms::FoldSparseToDense);
+ options->enable(Algorithms::FusePreActivationBatchNorm);
+ options->enable(Algorithms::MakeBatchNormGammaPositive);
+ options->enable(Algorithms::ShuffleWeightTo16x1Float32);
+ options->enable(Algorithms::RemoveUnnecessaryReshape);
+ options->enable(Algorithms::RemoveUnnecessarySlice);
+ options->enable(Algorithms::RemoveUnnecessarySplit);
+ options->enable(Algorithms::ReplaceMulAddWithDepthwiseConv);
+ options->enable(Algorithms::SubstituteTransposeToReshape);
+ options->enable(Algorithms::ConvertNCHWToNHWC);
+
+ o.optimize(&g);
+
+ SUCCEED();
+}
+
+TEST(CircleOptimizerTest, sparsify_simple)
+{
+ loco::Graph g;
+ luci::CircleOptimizer o;
+
+ auto options = o.options();
+
+ options->enable(Algorithms::SparsifyTensorPass);
+ options->param(AlgorithmParameters::Sparsify_tensor_name, "dummy");
+ options->param(AlgorithmParameters::Sparsify_traversal_order, "dummy");
+ options->param(AlgorithmParameters::Sparsify_format, "ds");
+ options->param(AlgorithmParameters::Sparsify_block_size, "1,1");
+ options->param(AlgorithmParameters::Sparsify_block_map, "1,1");
+
+ o.sparsify(&g);
+
+ SUCCEED();
+}
+
+TEST(CircleOptimizerTest, quantize_quantdequant_simple)
+{
+ loco::Graph g;
+ luci::CircleOptimizer o;
+
+ auto options = o.options();
+
+ options->enable(Algorithms::QuantizeDequantizeWeights);
+ options->param(AlgorithmParameters::Quantize_input_dtype, "float32");
+ options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+ options->param(AlgorithmParameters::Quantize_granularity, "layer");
+
+ o.quantize(&g);
+
+ SUCCEED();
+}
+
+TEST(CircleOptimizerTest, quantize_quantdequant_input_NEG)
+{
+ loco::Graph g;
+ luci::CircleOptimizer o;
+
+ auto options = o.options();
+
+ options->enable(Algorithms::QuantizeDequantizeWeights);
+ options->param(AlgorithmParameters::Quantize_input_dtype, "invalid");
+ options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+ options->param(AlgorithmParameters::Quantize_granularity, "layer");
+
+ EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleOptimizerTest, quantize_quantdequant_output_NEG)
+{
+ loco::Graph g;
+ luci::CircleOptimizer o;
+
+ auto options = o.options();
+
+ options->enable(Algorithms::QuantizeDequantizeWeights);
+ options->param(AlgorithmParameters::Quantize_input_dtype, "float32");
+ options->param(AlgorithmParameters::Quantize_output_dtype, "invalid");
+ options->param(AlgorithmParameters::Quantize_granularity, "layer");
+
+ EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleOptimizerTest, quantize_quantdequant_gran_NEG)
+{
+ loco::Graph g;
+ luci::CircleOptimizer o;
+
+ auto options = o.options();
+
+ options->enable(Algorithms::QuantizeDequantizeWeights);
+ options->param(AlgorithmParameters::Quantize_input_dtype, "float32");
+ options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+ options->param(AlgorithmParameters::Quantize_granularity, "invalid");
+
+ EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleOptimizerTest, quantize_minmax_simple)
+{
+ loco::Graph g;
+ luci::CircleOptimizer o;
+
+ auto options = o.options();
+
+ options->enable(Algorithms::QuantizeWithMinMax);
+ options->param(AlgorithmParameters::Quantize_input_dtype, "float32");
+ options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+ options->param(AlgorithmParameters::Quantize_granularity, "layer");
+
+ o.quantize(&g);
+
+ SUCCEED();
+}
+
+TEST(CircleOptimizerTest, quantize_minmax_input_NEG)
+{
+ loco::Graph g;
+ luci::CircleOptimizer o;
+
+ auto options = o.options();
+
+ options->enable(Algorithms::QuantizeWithMinMax);
+ options->param(AlgorithmParameters::Quantize_input_dtype, "invalid");
+ options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+ options->param(AlgorithmParameters::Quantize_granularity, "layer");
+
+ EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleOptimizerTest, quantize_minmax_output_NEG)
+{
+ loco::Graph g;
+ luci::CircleOptimizer o;
+
+ auto options = o.options();
+
+ options->enable(Algorithms::QuantizeWithMinMax);
+ options->param(AlgorithmParameters::Quantize_input_dtype, "float32");
+ options->param(AlgorithmParameters::Quantize_output_dtype, "invalid");
+ options->param(AlgorithmParameters::Quantize_granularity, "layer");
+
+ EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleOptimizerTest, quantize_minmax_gran_NEG)
+{
+ loco::Graph g;
+ luci::CircleOptimizer o;
+
+ auto options = o.options();
+
+ options->enable(Algorithms::QuantizeWithMinMax);
+ options->param(AlgorithmParameters::Quantize_input_dtype, "float32");
+ options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+ options->param(AlgorithmParameters::Quantize_granularity, "invalid");
+
+ EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleOptimizerTest, quantize_requant_simple)
+{
+ loco::Graph g;
+ luci::CircleOptimizer o;
+
+ auto options = o.options();
+
+ options->enable(Algorithms::Requantize);
+ options->param(AlgorithmParameters::Quantize_input_dtype, "int8");
+ options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+
+ o.quantize(&g);
+
+ SUCCEED();
+}
+
+TEST(CircleOptimizerTest, quantize_requant_input_NEG)
+{
+ loco::Graph g;
+ luci::CircleOptimizer o;
+
+ auto options = o.options();
+
+ options->enable(Algorithms::Requantize);
+ options->param(AlgorithmParameters::Quantize_input_dtype, "invalid");
+ options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+
+ EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleOptimizerTest, quantize_requant_output_NEG)
+{
+ loco::Graph g;
+ luci::CircleOptimizer o;
+
+ auto options = o.options();
+
+ options->enable(Algorithms::Requantize);
+ options->param(AlgorithmParameters::Quantize_input_dtype, "int8");
+ options->param(AlgorithmParameters::Quantize_output_dtype, "invalid");
+
+ EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
#include "CircleOptimizerUtils.h"
-namespace luci
-{
-
-bool in_array(const std::string &str, const std::vector<std::string> &array)
-{
- return std::find(array.begin(), array.end(), str) != array.end();
-}
+#include <luci/IR/CircleNode.h>
-std::string to_string(const std::vector<std::string> &strings)
-{
- assert(!strings.empty());
-
- std::string res;
- for (unsigned int i = 0; i < strings.size() - 1; i++)
- res += strings[i] + ", ";
-
- res += strings[strings.size() - 1];
- return res;
-}
-
-std::string to_lower_case(std::string s)
-{
- std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c) { return std::tolower(c); });
- return s;
-}
-
-loco::DataType str_to_dtype(const std::string &str)
+namespace luci
{
- if (to_lower_case(str).compare("uint8") == 0)
- return loco::DataType::U8;
- if (to_lower_case(str).compare("uint16") == 0)
- return loco::DataType::U16;
- if (to_lower_case(str).compare("uint32") == 0)
- return loco::DataType::U32;
- if (to_lower_case(str).compare("uint64") == 0)
- return loco::DataType::U64;
-
- if (to_lower_case(str).compare("int8") == 0)
- return loco::DataType::S8;
- if (to_lower_case(str).compare("int16") == 0)
- return loco::DataType::S16;
- if (to_lower_case(str).compare("int32") == 0)
- return loco::DataType::S32;
- if (to_lower_case(str).compare("int64") == 0)
- return loco::DataType::S64;
-
- if (to_lower_case(str).compare("float16") == 0)
- return loco::DataType::FLOAT16;
- if (to_lower_case(str).compare("float32") == 0)
- return loco::DataType::FLOAT32;
- if (to_lower_case(str).compare("float64") == 0)
- return loco::DataType::FLOAT64;
- if (to_lower_case(str).compare("bool") == 0)
- return loco::DataType::BOOL;
-
- return loco::DataType::Unknown;
-}
-
-QuantizationGranularity str_to_granularity(const std::string &str)
+bool has_dynamic_shape(const loco::Node *node)
{
- if (to_lower_case(str).compare("layer") == 0)
- return QuantizationGranularity::LayerWise;
-
- if (to_lower_case(str).compare("channel") == 0)
- return QuantizationGranularity::ChannelWise;
-
- throw std::runtime_error("Quantization granularity must be either 'layer' or 'channel'");
+ const auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
+ for (uint32_t i = 0; i < circle_node->rank(); ++i)
+ if (!circle_node->dim(i).known())
+ return true;
+ return false;
}
} // namespace luci
#ifndef __LUCI_CIRCLE_OPTIMIZER_UTILS_H__
#define __LUCI_CIRCLE_OPTIMIZER_UTILS_H__
-#include "luci/Pass/QuantizeDequantizeWeightsPass.h"
-#include "luci/Pass/QuantizeWithMinMaxPass.h"
-
#include <loco.h>
-#include <algorithm>
-
namespace luci
{
-bool in_array(const std::string &, const std::vector<std::string> &);
-
-std::string to_string(const std::vector<std::string> &);
-
-std::string to_lower_case(std::string);
-
-loco::DataType str_to_dtype(const std::string &);
-
-QuantizationGranularity str_to_granularity(const std::string &);
+bool has_dynamic_shape(const loco::Node *node);
} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "helpers/InferenceCandidates.h"
+
+#include "luci/Pass/CircleShapeInferencePass.h"
+
+#include <luci/Service/CircleShapeInference.h>
+
+#include <loco.h>
+
+namespace
+{
+
+bool is_same_shape(luci::CircleNode *node, loco::TensorShape shape)
+{
+ if (node->shape_status() != luci::ShapeStatus::VALID)
+ return false;
+
+ if (node->rank() != shape.rank())
+ return false;
+
+ for (uint32_t i = 0; i < node->rank(); ++i)
+ {
+ if (node->dim(i).known() != shape.dim(i).known())
+ return false;
+
+ if (node->dim(i).value() != shape.dim(i).value())
+ return false;
+ }
+
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool CircleShapeInferencePass::run(luci::Module *m)
+{
+ bool changed = false;
+
+ for (size_t g = 0; g < m->size(); ++g)
+ {
+ if (run(m->graph(g)))
+ changed = true;
+ }
+
+ return changed;
+}
+
+bool CircleShapeInferencePass::run(loco::Graph *g)
+{
+ luci::sinf::Rule shape_infer_rule;
+ bool changed = false;
+
+ for (auto node : inference_candidates(g))
+ {
+ loco::TensorShape shape;
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+
+ if (shape_infer_rule.infer(circle_node, shape) && !is_same_shape(circle_node, shape))
+ {
+ circle_node->rank(shape.rank());
+ for (uint32_t i = 0; i < shape.rank(); ++i)
+ circle_node->dim(i) = shape.dim(i);
+
+ circle_node->shape_status(luci::ShapeStatus::VALID);
+
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/CircleShapeInferencePass.h"
+
+#include <loco.h>
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+TEST(CircleShapeInferencePassTest, name)
+{
+ luci::CircleShapeInferencePass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+/**
+ * This test is to check whether shape inference is done by topological order.
+ *
+ * When perm() of "transpose1" is changed from "old_perm" to "new_perm"
+ * by some of luci/Pass like below diagram, shape_status of "transpose1" is
+ * still VALID even the shape should be changed.
+ * If "transpose2" is visited first before shape of "transpose1" is updated,
+ * "transpose2" can reference the shape of "relu" which is not updated yet.
+ * Then shape of "transpose2" becomes 3x5x5x1 and it causes an error at "conv2d".
+ *
+ * <Initial graph>
+ * 4x1x1x3
+ * [old_perm] ----------+ [filter] ----------+
+ * (0,2,1,3) | |
+ * | [bias] ----------+
+ * | |
+ * input ------> [transpose1] ------> [relu] ------> [conv2d] ------> output
+ * 1x5x5x3 1x5x5x3 1x5x5x3 1x5x5x4
+ *
+ *
+ * <Right after transformation>
+ * 4x1x1x3
+ * [new_perm] ----------+-----------------------------------+ [filter] ------+
+ * (3,2,1,0) | | |
+ * | | [bias] ------+
+ * | | |
+ * input ------> [transpose1] ------> [relu] ------> [transpose2] ------> [conv2d] ------> output
+ * 1x5x5x3 1x5x5x3 1x5x5x3 ? 1x5x5x4
+ *
+ *
+ * <Expected result>
+ * 4x1x1x3
+ * [new_perm] ----------+-----------------------------------+ [filter] ------+
+ * (3,2,1,0) | | |
+ * | | [bias] ------+
+ * | | |
+ * input ------> [transpose1] ------> [relu] ------> [transpose2] ------> [conv2d] ------> output
+ * 1x5x5x3 3x5x5x1 3x5x5x1 1x5x5x3 1x5x5x4
+ *
+ */
+TEST(CircleShapeInferencePassTest, original_node_change)
+{
+ luci::CircleShapeInferencePass pass;
+ auto g = loco::make_graph();
+
+ // Have to be packed into lambda to check throw
+ auto shape_inference_run = [&]() {
+ while (pass.run(g.get()) == true)
+ ;
+ };
+
+ // Create nodes to make relu traversed first
+ auto input = g->nodes()->create<luci::CircleInput>();
+ auto relu = g->nodes()->create<luci::CircleRelu>();
+ auto old_perm = g->nodes()->create<luci::CircleConst>();
+ auto transpose1 = g->nodes()->create<luci::CircleTranspose>();
+ auto filter = g->nodes()->create<luci::CircleConst>();
+ auto bias = g->nodes()->create<luci::CircleConst>();
+ auto conv2d = g->nodes()->create<luci::CircleConv2D>();
+ auto output = g->nodes()->create<luci::CircleOutput>();
+ auto new_perm = g->nodes()->create<luci::CircleConst>();
+ auto transpose2 = g->nodes()->create<luci::CircleTranspose>();
+
+ // Build up initial graph
+ auto graph_input = g->inputs()->create();
+ graph_input->shape({1, 5, 5, 3});
+
+ input->index(graph_input->index());
+ input->shape({1, 5, 5, 3});
+ input->shape_status(luci::ShapeStatus::VALID);
+
+ old_perm->dtype(loco::DataType::S32);
+ old_perm->size<loco::DataType::S32>(4);
+ old_perm->shape({4});
+ old_perm->at<loco::DataType::S32>(0) = 0;
+ old_perm->at<loco::DataType::S32>(1) = 2;
+ old_perm->at<loco::DataType::S32>(2) = 1;
+ old_perm->at<loco::DataType::S32>(3) = 3;
+ old_perm->shape_status(luci::ShapeStatus::VALID);
+
+ transpose1->a(input);
+ transpose1->perm(old_perm);
+
+ relu->features(transpose1);
+
+ filter->dtype(loco::DataType::FLOAT32);
+ filter->size<loco::DataType::FLOAT32>(4 * 1 * 1 * 3);
+ filter->shape({4, 1, 1, 3});
+ filter->shape_status(luci::ShapeStatus::VALID);
+
+ bias->dtype(loco::DataType::FLOAT32);
+ bias->size<loco::DataType::FLOAT32>(4);
+ bias->shape({4});
+ bias->shape_status(luci::ShapeStatus::VALID);
+
+ conv2d->input(relu);
+ conv2d->filter(filter);
+ conv2d->bias(bias);
+ conv2d->padding(luci::Padding::VALID);
+ conv2d->stride()->h(1);
+ conv2d->stride()->w(1);
+ conv2d->dilation()->h(1);
+ conv2d->dilation()->w(1);
+
+ output->from(conv2d);
+ auto graph_output = g->outputs()->create();
+ output->index(graph_output->index());
+ graph_output->shape({1, 5, 5, 4});
+
+ ASSERT_NO_THROW(shape_inference_run());
+
+ // Transform graph
+ new_perm->dtype(loco::DataType::S32);
+ new_perm->size<loco::DataType::S32>(4);
+ new_perm->shape({4});
+ new_perm->at<loco::DataType::S32>(0) = 3;
+ new_perm->at<loco::DataType::S32>(1) = 2;
+ new_perm->at<loco::DataType::S32>(2) = 1;
+ new_perm->at<loco::DataType::S32>(3) = 0;
+ new_perm->shape_status(luci::ShapeStatus::VALID);
+
+ transpose1->perm(new_perm);
+
+ transpose2->a(relu);
+ transpose2->perm(new_perm);
+
+ conv2d->input(transpose2);
+
+ ASSERT_NO_THROW(shape_inference_run());
+
+ // Check result of shape inference is correct
+ ASSERT_EQ(3, transpose1->dim(0).value());
+ ASSERT_EQ(5, transpose1->dim(1).value());
+ ASSERT_EQ(5, transpose1->dim(2).value());
+ ASSERT_EQ(1, transpose1->dim(3).value());
+
+ ASSERT_EQ(3, relu->dim(0).value());
+ ASSERT_EQ(5, relu->dim(1).value());
+ ASSERT_EQ(5, relu->dim(2).value());
+ ASSERT_EQ(1, relu->dim(3).value());
+
+ ASSERT_EQ(1, transpose2->dim(0).value());
+ ASSERT_EQ(5, transpose2->dim(1).value());
+ ASSERT_EQ(5, transpose2->dim(2).value());
+ ASSERT_EQ(3, transpose2->dim(3).value());
+
+ ASSERT_EQ(1, conv2d->dim(0).value());
+ ASSERT_EQ(5, conv2d->dim(1).value());
+ ASSERT_EQ(5, conv2d->dim(2).value());
+ ASSERT_EQ(4, conv2d->dim(3).value());
+
+ SUCCEED();
+}
+
+/**
+ * This test is for checking when imported shape is wrong.
+ *
+ * Even "concat1" has wrong shape at first, correct shape should be inferred.
+ *
+ * <Initial graph>
+ *
+ * 1x1x1x1
+ * input1 ------+ 8x7x6x5
+ * +-----> [concat1] ------+
+ * input2 ------+ (axis=3) | 1x1x2x3
+ * 1x1x1x2 +------> [concat2] ------> output
+ * | (axis=2)
+ * 1x1x1x3 |
+ * input3 ------------------------------+
+ *
+ *
+ * <Expected result>
+ *
+ * 1x1x1x1
+ * input1 ------+ 1x1x1x3
+ * +-----> [concat1] ------+
+ * input2 ------+ (axis=3) | 1x1x2x3
+ * 1x1x1x2 +------> [concat2] ------> output
+ * | (axis=2)
+ * 1x1x1x3 |
+ * input3 ------------------------------+
+ */
+TEST(CircleShapeInferencePassTest, wrong_imported_shape)
+{
+ luci::CircleShapeInferencePass pass;
+ auto g = loco::make_graph();
+
+ // Have to be packed into lambda to check throw
+ auto shape_inference_run = [&]() {
+ while (pass.run(g.get()) == true)
+ ;
+ };
+
+ // Create nodes to make concat2 traversed first
+ auto concat2 = g->nodes()->create<luci::CircleConcatenation>(2);
+ auto concat1 = g->nodes()->create<luci::CircleConcatenation>(2);
+ auto input1 = g->nodes()->create<luci::CircleInput>();
+ auto input2 = g->nodes()->create<luci::CircleInput>();
+ auto input3 = g->nodes()->create<luci::CircleInput>();
+
+ // Build up initial graph
+ auto graph_input1 = g->inputs()->create();
+ auto graph_input2 = g->inputs()->create();
+ auto graph_input3 = g->inputs()->create();
+ graph_input1->shape({1, 1, 1, 1});
+ graph_input2->shape({1, 1, 1, 2});
+ graph_input2->shape({1, 1, 1, 3});
+
+ input1->index(graph_input1->index());
+ input1->shape({1, 1, 1, 1});
+ input1->shape_status(luci::ShapeStatus::VALID);
+
+ input2->index(graph_input2->index());
+ input2->shape({1, 1, 1, 2});
+ input2->shape_status(luci::ShapeStatus::VALID);
+
+ input3->index(graph_input3->index());
+ input3->shape({1, 1, 1, 3});
+ input3->shape_status(luci::ShapeStatus::VALID);
+
+ concat1->values(0, input1);
+ concat1->values(1, input2);
+ concat1->axis(3);
+ concat1->shape({8, 7, 6, 5}); // Intentionally set wrong shape
+ concat1->shape_status(luci::ShapeStatus::VALID);
+
+ concat2->values(0, concat1);
+ concat2->values(1, input3);
+ concat2->axis(2);
+
+ auto output = g->nodes()->create<luci::CircleOutput>();
+ output->from(concat2);
+ auto graph_output = g->outputs()->create();
+ output->index(graph_output->index());
+ graph_output->shape({1, 1, 2, 3});
+
+ ASSERT_NO_THROW(shape_inference_run());
+
+ // Check result of shape inference is correct
+ ASSERT_EQ(1, concat1->dim(0).value());
+ ASSERT_EQ(1, concat1->dim(1).value());
+ ASSERT_EQ(1, concat1->dim(2).value());
+ ASSERT_EQ(3, concat1->dim(3).value());
+
+ ASSERT_EQ(1, concat2->dim(0).value());
+ ASSERT_EQ(1, concat2->dim(1).value());
+ ASSERT_EQ(2, concat2->dim(2).value());
+ ASSERT_EQ(3, concat2->dim(3).value());
+
+ SUCCEED();
+}
+
+/**
+ * This test is for checking that virtual operations which is not used for graph output
+ * but shape should be exported.
+ *
+ * Although "split_out2" is not used for graph output, shape should be inferenced.
+ *
+ * <Initial graph>
+ *
+ *
+ * 1x6 +----> [split_out1] ----> output
+ * input ------> [split] -----+
+ * (split_dim=1) +----> [split_out2]
+ * (num_split=2)
+ *
+ *
+ * <Expected result>
+ * 1x3 1x3
+ * 1x6 +----> [split_out1] ----> output
+ * input ------> [split] -----+
+ * (split_dim=1) +----> [split_out2]
+ * (num_split=2) 1x3
+ */
+TEST(CircleShapeInferencePassTest, not_used_virtual_op)
+{
+ luci::CircleShapeInferencePass pass;
+ auto g = loco::make_graph();
+
+ // Have to be packed into lambda to check throw
+ auto shape_inference_run = [&]() {
+ while (pass.run(g.get()) == true)
+ ;
+ };
+
+ // Create nodes
+ auto input = g->nodes()->create<luci::CircleInput>();
+ auto split = g->nodes()->create<luci::CircleSplit>();
+ auto split_out1 = g->nodes()->create<luci::CircleSplitOut>();
+ auto split_out2 = g->nodes()->create<luci::CircleSplitOut>();
+ auto split_dim = g->nodes()->create<luci::CircleConst>();
+
+ // Build up initial graph
+ auto graph_input1 = g->inputs()->create();
+ graph_input1->shape({1, 6});
+
+ input->index(graph_input1->index());
+ input->shape({1, 6});
+ input->shape_status(luci::ShapeStatus::VALID);
+
+ split_dim->dtype(loco::DataType::S32);
+ split_dim->size<loco::DataType::S32>(1);
+ split_dim->shape({1});
+ split_dim->at<loco::DataType::S32>(0) = 1;
+ split_dim->shape_status(luci::ShapeStatus::VALID);
+
+ split->split_dim(split_dim);
+ split->input(input);
+ split->num_split(2);
+
+ split_out1->input(split);
+ split_out1->index(0);
+
+ split_out2->input(split);
+ split_out2->index(1);
+
+ auto output = g->nodes()->create<luci::CircleOutput>();
+ output->from(split_out1);
+ auto graph_output = g->outputs()->create();
+ output->index(graph_output->index());
+ graph_output->shape({1, 3});
+
+ ASSERT_NO_THROW(shape_inference_run());
+
+ // Check result of shape inference is correct
+ ASSERT_EQ(1, split_out1->dim(0).value());
+ ASSERT_EQ(3, split_out1->dim(1).value());
+
+ ASSERT_EQ(1, split_out2->dim(0).value());
+ ASSERT_EQ(3, split_out2->dim(1).value());
+
+ SUCCEED();
+}
* limitations under the License.
*/
+#include "helpers/InferenceCandidates.h"
+
#include "luci/Pass/CircleTypeInferencePass.h"
#include <luci/Service/CircleTypeInference.h>
luci::tinf::Rule type_infer_rule;
bool changed = false;
- for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
+ for (auto node : inference_candidates(g))
{
loco::DataType dtype;
auto circle_node = loco::must_cast<luci::CircleNode *>(node);
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/CircleTypeInferencePass.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleTypeInferencePassTest, name)
+{
+ luci::CircleTypeInferencePass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ConvertNCHWToNHWCPass.h"
+#include "CircleOptimizerUtils.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/Log.h>
+
+namespace
+{
+
+enum class DataFormat
+{
+ NCHW,
+ NHWC
+};
+
+/**
+ * @brief Set annotation for DataFormat (NCHW, NHWC)
+ *
+ * @note DataFormatAnnotation will live longer than this Pass (until the
+ * annotated loco::Node is erased). So, do not use large data in the
+ * annotation to avoid excessive memory usage.
+ */
+class DataFormatAnnotation final : public loco::NodeAnnotation
+{
+public:
+ DataFormatAnnotation(const DataFormat &format) : _format{format}
+ {
+ // DO NOTHING
+ }
+
+public:
+ const DataFormat &format(void) const { return _format; }
+
+private:
+ DataFormat _format;
+};
+
+void set_data_format(loco::Node *node, const DataFormat &format)
+{
+ node->annot(std::make_unique<DataFormatAnnotation>(format));
+}
+
+DataFormat get_data_format(loco::Node *node)
+{
+ assert(node->annot<DataFormatAnnotation>() != nullptr);
+ return node->annot<DataFormatAnnotation>()->format();
+}
+
+bool has_data_format(loco::Node *node) { return node->annot<DataFormatAnnotation>() != nullptr; }
+
+luci::CircleTranspose *create_4d_transpose(luci::CircleNode *node,
+ const std::vector<int32_t> indices)
+{
+ assert(indices.size() == 4);
+
+ auto name = node->name();
+ assert(name.length() > 0);
+
+ auto perm = node->graph()->nodes()->create<luci::CircleConst>();
+ perm->dtype(loco::DataType::S32);
+ perm->size<loco::DataType::S32>(4);
+ perm->rank(1);
+ perm->dim(0) = 4;
+ for (uint32_t i = 0; i < 4; i++)
+ perm->at<loco::DataType::S32>(i) = indices[i];
+ perm->shape_status(luci::ShapeStatus::VALID);
+
+ auto make_string = [](const std::vector<int32_t> &nums) {
+ std::string str;
+ for (auto num : nums)
+ {
+ if (str.length() > 0)
+ str += ".";
+ str += std::to_string(num);
+ }
+ return str;
+ };
+
+ auto str_indices = make_string(indices);
+
+ perm->name(name + "/Transpose_" + str_indices + "/perm");
+
+ auto trans = node->graph()->nodes()->create<luci::CircleTranspose>();
+ trans->perm(perm);
+ trans->name(name + "/Transpose_" + str_indices);
+ luci::add_origin(trans, luci::get_origin(node));
+
+ return trans;
+}
+
+int32_t nchw_axis_to_nhwc(int32_t axis)
+{
+ uint32_t pos_axis = axis >= 0 ? static_cast<uint32_t>(axis) : static_cast<uint32_t>(axis + 4);
+ static const uint32_t to_nhwc[4] = {0, 3, 1, 2};
+ if (pos_axis > 3)
+ throw std::runtime_error("Concat axis must be in range [-4, 4)");
+ return to_nhwc[pos_axis];
+}
+
+luci::CircleTranspose *create_post_transpose(luci::CircleNode *node)
+{
+ return create_4d_transpose(node, {0, 3, 1, 2});
+}
+
+luci::CircleTranspose *create_pre_transpose(luci::CircleNode *node)
+{
+ return create_4d_transpose(node, {0, 2, 3, 1});
+}
+
+uint32_t cal_offset(const loco::TensorShape &dimension, const uint32_t *indices)
+{
+ return indices[0] * dimension.dim(1).value() * dimension.dim(2).value() *
+ dimension.dim(3).value() +
+ indices[1] * dimension.dim(2).value() * dimension.dim(3).value() +
+ indices[2] * dimension.dim(3).value() + indices[3];
+}
+
+luci::CircleConst *create_NHWC_paddings(luci::CircleConst *paddings)
+{
+ // paddings shape is (4,2) (it was checked by is_NCHW)
+ assert(paddings != nullptr);
+ assert(paddings->rank() == 2);
+ assert(paddings->dim(0).value() == 4);
+ assert(paddings->dim(1).value() == 2);
+
+ // paddings for idx 0~3 are 0 (checked by is_NCHW)
+ assert(paddings->at<loco::DataType::S32>(0) == 0);
+ assert(paddings->at<loco::DataType::S32>(1) == 0);
+ assert(paddings->at<loco::DataType::S32>(2) == 0);
+ assert(paddings->at<loco::DataType::S32>(3) == 0);
+
+ auto name = paddings->name();
+ assert(name.length() > 0);
+
+ auto nhwc_paddings = paddings->graph()->nodes()->create<luci::CircleConst>();
+ nhwc_paddings->dtype(loco::DataType::S32);
+ nhwc_paddings->shape({4, 2});
+ nhwc_paddings->shape_status(luci::ShapeStatus::VALID);
+ nhwc_paddings->size<loco::DataType::S32>(4 * 2);
+ nhwc_paddings->name(name + "_NHWC");
+
+ for (uint32_t dim = 0; dim < 4; dim++)
+ {
+ for (uint32_t i = 0; i < 2; i++)
+ {
+ int32_t data = 0;
+
+ if (dim == 1)
+ {
+ // get third dimension (H in NCHW)
+ data = paddings->at<loco::DataType::S32>(2 * 2 + i);
+ }
+ else if (dim == 2)
+ {
+ // get fourth dimension (W in NCHW)
+ data = paddings->at<loco::DataType::S32>(3 * 2 + i);
+ }
+
+ nhwc_paddings->at<loco::DataType::S32>(dim * 2 + i) = data;
+ }
+ }
+ return nhwc_paddings;
+}
+
+luci::CircleConst *create_NHWC_from_NCHW(luci::CircleConst *constant)
+{
+ LOGGER(l);
+ assert(constant->rank() == 4);
+
+ // TODO: Support non-float types
+ if (constant->dtype() != loco::DataType::FLOAT32)
+ {
+ INFO(l) << "Non-float type constant: " << constant->name() << std::endl;
+ return nullptr;
+ }
+
+ loco::TensorShape nchw_dimension{constant->dim(0), constant->dim(1), constant->dim(2),
+ constant->dim(3)};
+ loco::TensorShape nhwc_dimension{constant->dim(0), constant->dim(2), constant->dim(3),
+ constant->dim(1)};
+
+ auto name = constant->name();
+ assert(name.length() > 0);
+
+ auto nhwc_const = constant->graph()->nodes()->create<luci::CircleConst>();
+ nhwc_const->dtype(constant->dtype());
+ nhwc_const->rank(4);
+ nhwc_const->dim(0).set(constant->dim(0).value());
+ nhwc_const->dim(1).set(constant->dim(2).value());
+ nhwc_const->dim(2).set(constant->dim(3).value());
+ nhwc_const->dim(3).set(constant->dim(1).value());
+ nhwc_const->shape_status(luci::ShapeStatus::VALID);
+ nhwc_const->size<loco::DataType::FLOAT32>(constant->size<loco::DataType::FLOAT32>());
+ nhwc_const->name(name + "_NHWC");
+
+ for (uint32_t n = 0; n < nchw_dimension.dim(0).value(); n++)
+ {
+ for (uint32_t c = 0; c < nchw_dimension.dim(1).value(); c++)
+ {
+ for (uint32_t h = 0; h < nchw_dimension.dim(2).value(); h++)
+ {
+ for (uint32_t w = 0; w < nchw_dimension.dim(3).value(); w++)
+ {
+ uint32_t nchw_indices[4] = {n, c, h, w};
+ uint32_t nhwc_indices[4] = {n, h, w, c};
+ auto data =
+ constant->at<loco::DataType::FLOAT32>(cal_offset(nchw_dimension, nchw_indices));
+ nhwc_const->at<loco::DataType::FLOAT32>(cal_offset(nhwc_dimension, nhwc_indices)) = data;
+ }
+ }
+ }
+ }
+ return nhwc_const;
+}
+
+// NOTE Following conditions can be extended later
+//
+// Find PAD with an NCHW pattern described below
+// - Paddings shape : [4, 2]
+// - Paddings value : [[0, 0], [0, 0], [h_t, h_b], [w_t, w_b]]]
+bool is_NCHW(const luci::CirclePad *node)
+{
+ const auto paddings = dynamic_cast<luci::CircleConst *>(node->paddings());
+ // Non-const paddings is not supported
+ if (paddings == nullptr)
+ return false;
+
+ if (paddings->rank() != 2)
+ return false;
+
+ if (paddings->dim(0).value() != 4 || paddings->dim(1).value() != 2)
+ return false;
+
+ // Only check the first two dimensions
+ for (uint32_t dim = 0; dim < 2; dim++)
+ {
+ for (uint32_t i = 0; i < 2; i++)
+ {
+ auto data = paddings->at<loco::DataType::S32>(dim * 2 + i);
+ if (data != 0)
+ return false;
+ }
+ }
+
+ return true;
+}
+
+// NOTE Following conditions can be extended later
+//
+// Find MUL with an NCHW pattern described below
+// - Input (non-constant) shape : [N, C, H, W]
+// - Input (constant) shape : [1, C, 1, 1]
+// - Output shape : [N, C, H, W]
+bool is_NCHW_with_const(const luci::CircleMul *node, luci::CircleNode *&pred_node,
+ luci::CircleConst *&multiplier)
+{
+ auto x = dynamic_cast<luci::CircleConst *>(node->x());
+ auto y = dynamic_cast<luci::CircleConst *>(node->y());
+
+ if (x != nullptr && y == nullptr)
+ {
+ pred_node = loco::must_cast<luci::CircleNode *>(node->y());
+ multiplier = x;
+ }
+ else if (x == nullptr && y != nullptr)
+ {
+ pred_node = loco::must_cast<luci::CircleNode *>(node->x());
+ multiplier = y;
+ }
+ else
+ {
+ // Ignore if MUL does not have a multiplier input.
+ return false;
+ }
+
+ if (pred_node->rank() != 4)
+ return false;
+
+ const auto const_rank = multiplier->rank();
+ if (const_rank != 4)
+ return false;
+
+ for (uint32_t i = 0; i < const_rank; i++)
+ {
+ if (i != 1 && multiplier->dim(i).value() != 1)
+ return false;
+ }
+
+ const auto const_cdim = multiplier->dim(1);
+ const auto input_cdim = pred_node->dim(1);
+ const auto output_cdim = node->dim(1);
+
+ if (const_cdim == input_cdim && input_cdim == output_cdim)
+ return true;
+ else
+ return false;
+}
+
+// We assume ADD with const input is NCHW if,
+// Input shape: (N, C, H, W)
+// Output shape: (N, C, H, W)
+// 1. Const shape is (1, C, 1, 1)
+// 2. Input, Output, Const have the same C.
+bool is_NCHW_with_const(const luci::CircleAdd *node, luci::CircleNode *&pred_node,
+ luci::CircleConst *&beta)
+{
+ auto x = dynamic_cast<luci::CircleConst *>(node->x());
+ auto y = dynamic_cast<luci::CircleConst *>(node->y());
+
+ if (x != nullptr && y == nullptr)
+ {
+ pred_node = loco::must_cast<luci::CircleNode *>(node->y());
+ beta = x;
+ }
+ else if (x == nullptr && y != nullptr)
+ {
+ pred_node = loco::must_cast<luci::CircleNode *>(node->x());
+ beta = y;
+ }
+ else
+ {
+ // Ignore if ADD does not have a constant input.
+ return false;
+ }
+
+ if (pred_node->rank() != 4)
+ return false;
+
+ const auto const_rank = beta->rank();
+ if (const_rank != 4)
+ return false;
+
+ // Check the shape is (1, C, 1, 1)
+ for (uint32_t i = 0; i < const_rank; i++)
+ {
+ if (i == 1)
+ continue;
+
+ if (beta->dim(i).value() != 1)
+ return false;
+ }
+
+ const auto const_cdim = beta->dim(1);
+ const auto input_cdim = pred_node->dim(1);
+ const auto output_cdim = node->dim(1);
+
+ // Check Input, Output, Const have the same channel size
+ if (const_cdim == input_cdim && input_cdim == output_cdim)
+ return true;
+ else
+ return false;
+}
+
+template <class T> bool convert_unary_features(T *node)
+{
+ const auto pred_node = loco::must_cast<luci::CircleNode *>(node->features());
+ auto pre_trans = create_pre_transpose(node);
+ pre_trans->a(pred_node);
+ node->features(pre_trans);
+
+ // Do shape inference for this node again.
+ node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+ auto post_trans = create_post_transpose(node);
+ loco::replace(node).with(post_trans);
+
+ post_trans->a(node);
+
+ return true;
+}
+
+class ConvertNCHWToNHWC final : public luci::CircleNodeMutableVisitor<bool>
+{
+ // Default
+ bool visit(luci::CircleNode *node)
+ {
+ throw std::runtime_error(node->name() + " is an unsupported operator.");
+ }
+
+ bool visit(luci::CircleInput *node)
+ {
+ const auto n = node->dim(0);
+ const auto c = node->dim(1);
+ const auto h = node->dim(2);
+ const auto w = node->dim(3);
+
+ node->dim(1) = h;
+ node->dim(2) = w;
+ node->dim(3) = c;
+
+ // Do shape inference for this node again.
+ node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+ // Insert post-tranpose
+ auto post_trans = create_post_transpose(node);
+ loco::replace(node).with(post_trans);
+
+ post_trans->a(node);
+
+ // Update graph input
+ auto graph_inputs = node->graph()->inputs();
+ auto graph_input = graph_inputs->at(node->index());
+ graph_input->shape({n, h, w, c});
+
+ return true;
+ }
+
+ bool visit(luci::CircleOutput *node)
+ {
+ // Insert pre-transpose
+ auto pre_trans = create_pre_transpose(node);
+ pre_trans->a(node->from());
+
+ node->from(pre_trans);
+
+ // Do shape inference for this node again.
+ node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+ // Update graph output
+ const auto n = node->dim(0).value();
+ const auto c = node->dim(1).value();
+ const auto h = node->dim(2).value();
+ const auto w = node->dim(3).value();
+
+ auto graph_outputs = node->graph()->outputs();
+ auto graph_output = graph_outputs->at(node->index());
+ graph_output->shape({n, h, w, c});
+
+ return true;
+ }
+
+ bool visit(luci::CircleAdd *node)
+ {
+ luci::CircleNode *pred_node = nullptr;
+ luci::CircleConst *beta = nullptr;
+
+ if (is_NCHW_with_const(node, pred_node, beta))
+ {
+ auto pre_trans = create_pre_transpose(node);
+ pre_trans->a(pred_node);
+
+ auto nhwc_const = create_NHWC_from_NCHW(beta);
+ if (nhwc_const == nullptr)
+ return false;
+
+ node->x(pre_trans);
+ node->y(nhwc_const);
+ }
+ else if (beta == nullptr)
+ {
+ // Both inputs are not constant.
+ // In this case, we cannot distinguish NCHW from NHWC,
+ // so just insert Transpose Ops.
+ auto pre_trans_x = create_pre_transpose(node);
+ pre_trans_x->a(node->x());
+ node->x(pre_trans_x);
+
+ auto pre_trans_y = create_pre_transpose(node);
+ pre_trans_y->a(node->y());
+ node->y(pre_trans_y);
+ }
+ else
+ {
+ return false;
+ }
+
+ // Do shape inference for this node again.
+ node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+ auto post_trans = create_post_transpose(node);
+ loco::replace(node).with(post_trans);
+
+ post_trans->a(node);
+ return true;
+ }
+
+ bool visit(luci::CircleConcatenation *node)
+ {
+ const auto num_values = node->numValues();
+ for (uint32_t i = 0; i < num_values; i++)
+ {
+ auto pred_node = loco::must_cast<luci::CircleNode *>(node->values(i));
+ auto pre_trans = create_pre_transpose(node);
+ pre_trans->a(pred_node);
+ node->values(i, pre_trans);
+ }
+
+ // Do shape inference for this node again.
+ node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+ node->axis(nchw_axis_to_nhwc(node->axis()));
+
+ auto post_trans = create_post_transpose(node);
+ loco::replace(node).with(post_trans);
+
+ post_trans->a(node);
+
+ return true;
+ }
+
+ bool visit(luci::CircleLeakyRelu *node)
+ {
+ return convert_unary_features<luci::CircleLeakyRelu>(node);
+ }
+
+ bool visit(luci::CircleMul *node)
+ {
+ LOGGER(l);
+
+ luci::CircleNode *pred_node = nullptr;
+ luci::CircleConst *multiplier = nullptr;
+
+ if (is_NCHW_with_const(node, pred_node, multiplier))
+ {
+ auto pre_trans = create_pre_transpose(node);
+ pre_trans->a(pred_node);
+ node->x(pre_trans);
+
+ auto nhwc_const = create_NHWC_from_NCHW(multiplier);
+ node->y(nhwc_const);
+ }
+ else if (multiplier == nullptr)
+ {
+ // TODO : Implement this case.
+ INFO(l) << "Not yet implemented. Both inputs of MUL are non-const." << std::endl;
+ return false;
+ }
+ else
+ {
+ return false;
+ }
+
+ // Do shape inference for this node again.
+ node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+ auto post_trans = create_post_transpose(node);
+ loco::replace(node).with(post_trans);
+
+ post_trans->a(node);
+ return true;
+ }
+
+ bool visit(luci::CircleNeg *node)
+ {
+ const auto pred_node = loco::must_cast<luci::CircleNode *>(node->x());
+ auto pre_trans = create_pre_transpose(node);
+ pre_trans->a(pred_node);
+ node->x(pre_trans);
+
+ // Do shape inference for this node again.
+ node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+ auto post_trans = create_post_transpose(node);
+ loco::replace(node).with(post_trans);
+
+ post_trans->a(node);
+
+ return true;
+ }
+
+ bool visit(luci::CirclePad *node)
+ {
+ if (!is_NCHW(node))
+ return false;
+
+ const auto pred_node = loco::must_cast<luci::CircleNode *>(node->input());
+ auto pre_trans = create_pre_transpose(node);
+ pre_trans->a(pred_node);
+ node->input(pre_trans);
+
+ auto nchw_paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
+ const auto nhwc_paddings = create_NHWC_paddings(nchw_paddings);
+ node->paddings(nhwc_paddings);
+
+ // Do shape inference for this node again.
+ node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+ auto post_trans = create_post_transpose(node);
+ loco::replace(node).with(post_trans);
+
+ post_trans->a(node);
+
+ return true;
+ }
+
+ bool visit(luci::CircleRelu *node) { return convert_unary_features<luci::CircleRelu>(node); }
+
+ bool visit(luci::CircleRelu6 *node) { return convert_unary_features<luci::CircleRelu6>(node); }
+};
+
+} // namespace
+
+namespace luci
+{
+
+bool ConvertNCHWToNHWCPass::run(loco::Graph *g)
+{
+ LOGGER(l);
+ INFO(l) << "ConvertNCHWToNHWCPass Start" << std::endl;
+
+ // Annotate NCHW operators
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ switch (circle_node->opcode())
+ {
+ // List of supported Ops
+ case luci::CircleOpcode::CIRCLEINPUT:
+ if (!_preserve_input && !has_data_format(node))
+ {
+ set_data_format(node, DataFormat::NCHW);
+ }
+ break;
+ case luci::CircleOpcode::CIRCLEOUTPUT:
+ if (!_preserve_output && !has_data_format(node))
+ {
+ set_data_format(node, DataFormat::NCHW);
+ }
+ break;
+ case luci::CircleOpcode::ADD:
+ case luci::CircleOpcode::CONCATENATION:
+ case luci::CircleOpcode::LEAKY_RELU:
+ case luci::CircleOpcode::MUL:
+ case luci::CircleOpcode::NEG:
+ case luci::CircleOpcode::PAD:
+ case luci::CircleOpcode::RELU:
+ case luci::CircleOpcode::RELU6:
+ if (!has_data_format(node))
+ {
+ set_data_format(node, DataFormat::NCHW);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ if (!has_data_format(node))
+ {
+ // Unsupported Op
+ continue;
+ }
+ else if (get_data_format(node) == DataFormat::NHWC)
+ {
+ // Already converted to NHWC
+ continue;
+ }
+ else if (has_dynamic_shape(node))
+ {
+ // This pass only works for static-shaped node
+ INFO(l) << "Skip the node with a dynamic shape." << std::endl;
+ continue;
+ }
+ else
+ {
+ ConvertNCHWToNHWC converter;
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ if (circle_node->rank() != 4)
+ continue;
+
+ if (circle_node->accept(&converter))
+ {
+ set_data_format(node, DataFormat::NHWC);
+ changed = true;
+ }
+ else
+ {
+ continue;
+ }
+ }
+ }
+
+ INFO(l) << "ConvertNCHWToNHWCPass End" << std::endl;
+ return changed;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/Phase.h>
+
+#include "luci/Pass/ConvertNCHWToNHWCPass.h"
+#include "luci/Pass/CircleShapeInferencePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+/**
+ * Graph with a single Op (example: Add).
+ *
+ * BEFORE
+ * - All Ops including Input/Output are NCHW.
+ *
+ * [Input] [beta]
+ * | /
+ * [Add]
+ * |
+ * [Output]
+ *
+ * AFTER
+ * - All Ops including Input/Output are NHWC.
+ *
+ * [Input]
+ * |
+ * [Transpose]
+ * |
+ * [Transpose] [beta]
+ * | /
+ * [Add]
+ * |
+ * [Transpose]
+ * |
+ * [Transpose]
+ * |
+ * [Output]
+ */
+class SimpleGraph
+{
+public:
+ SimpleGraph() = default;
+
+public:
+ void init()
+ {
+ input = g.nodes()->create<luci::CircleInput>();
+ output = g.nodes()->create<luci::CircleOutput>();
+ input->name("input");
+ output->name("output");
+
+ auto graph_input = g.inputs()->create();
+ input->index(graph_input->index());
+ auto graph_output = g.outputs()->create();
+ output->index(graph_output->index());
+
+ graph_input->dtype(loco::DataType::FLOAT32);
+ input->dtype(loco::DataType::FLOAT32);
+ output->dtype(loco::DataType::FLOAT32);
+ graph_output->dtype(loco::DataType::FLOAT32);
+
+ uint32_t channel_size = 16;
+ graph_input->shape({1, channel_size, 4, 4});
+ input->shape({1, channel_size, 4, 4});
+ output->shape({1, channel_size, 4, 4});
+ graph_output->shape({1, channel_size, 4, 4});
+
+ auto graph_body = insertGraphBody(input);
+ output->from(graph_body);
+ }
+
+ virtual ~SimpleGraph() = default;
+
+protected:
+ virtual loco::Node *insertGraphBody(loco::Node *input) = 0;
+
+public:
+ loco::Graph g;
+ luci::CircleInput *input = nullptr;
+ luci::CircleOutput *output = nullptr;
+};
+
+class AddGraph final : public SimpleGraph
+{
+protected:
+ loco::Node *insertGraphBody(loco::Node *input) override
+ {
+ add = g.nodes()->create<luci::CircleAdd>();
+ beta = g.nodes()->create<luci::CircleConst>();
+
+ add->dtype(loco::DataType::FLOAT32);
+ beta->dtype(loco::DataType::FLOAT32);
+
+ uint32_t channel_size = 16;
+ add->shape({1, channel_size, 4, 4});
+ beta->shape({1, channel_size, 1, 1});
+
+ beta->size<loco::DataType::FLOAT32>(channel_size);
+ for (uint32_t i = 0; i < channel_size; i++)
+ {
+ beta->at<loco::DataType::FLOAT32>(i) = i;
+ }
+
+ add->x(input);
+ add->y(beta);
+
+ add->name("add");
+ beta->name("beta");
+
+ return add;
+ }
+
+public:
+ luci::CircleAdd *add = nullptr;
+ luci::CircleConst *beta = nullptr;
+};
+
+class ConcatenationGraph final : public SimpleGraph
+{
+protected:
+ loco::Node *insertGraphBody(loco::Node *input) override
+ {
+ concat = g.nodes()->create<luci::CircleConcatenation>(2);
+ concat->values(0, input);
+ concat->axis(1);
+
+ input2 = g.nodes()->create<luci::CircleConst>();
+ input2->dtype(loco::DataType::FLOAT32);
+ input2->shape({1, 16, 4, 4});
+ input2->size<loco::DataType::FLOAT32>(16 * 4 * 4);
+ for (uint32_t i = 0; i < 16 * 4 * 4; i++)
+ {
+ input2->at<loco::DataType::FLOAT32>(i) = i;
+ }
+ concat->values(1, input2);
+
+ concat->name("concat");
+ input2->name("input2");
+
+ return concat;
+ }
+
+public:
+ luci::CircleConcatenation *concat = nullptr;
+ luci::CircleConst *input2 = nullptr;
+};
+
+class LeakyReluGraph final : public SimpleGraph
+{
+protected:
+ loco::Node *insertGraphBody(loco::Node *input) override
+ {
+ leakyrelu = g.nodes()->create<luci::CircleLeakyRelu>();
+ leakyrelu->features(input);
+ leakyrelu->name("leakyrelu");
+
+ return leakyrelu;
+ }
+
+public:
+ luci::CircleLeakyRelu *leakyrelu = nullptr;
+};
+
+class MulGraph final : public SimpleGraph
+{
+protected:
+ loco::Node *insertGraphBody(loco::Node *input) override
+ {
+ mul = g.nodes()->create<luci::CircleMul>();
+ multiplier = g.nodes()->create<luci::CircleConst>();
+
+ mul->dtype(loco::DataType::FLOAT32);
+ multiplier->dtype(loco::DataType::FLOAT32);
+
+ uint32_t channel_size = 16;
+ mul->shape({1, channel_size, 4, 4});
+ multiplier->shape({1, channel_size, 1, 1});
+
+ multiplier->size<loco::DataType::FLOAT32>(channel_size);
+ for (uint32_t i = 0; i < channel_size; i++)
+ {
+ multiplier->at<loco::DataType::FLOAT32>(i) = i;
+ }
+
+ mul->x(input);
+ mul->y(multiplier);
+
+ mul->name("mul");
+ multiplier->name("multiplier");
+
+ return mul;
+ }
+
+public:
+ luci::CircleMul *mul = nullptr;
+ luci::CircleConst *multiplier = nullptr;
+};
+
+class NegGraph final : public SimpleGraph
+{
+protected:
+ loco::Node *insertGraphBody(loco::Node *input) override
+ {
+ neg = g.nodes()->create<luci::CircleNeg>();
+ neg->x(input);
+ neg->name("neg");
+
+ return neg;
+ }
+
+public:
+ luci::CircleNeg *neg = nullptr;
+};
+
+class PadGraph final : public SimpleGraph
+{
+protected:
+ loco::Node *insertGraphBody(loco::Node *input) override
+ {
+ pad = g.nodes()->create<luci::CirclePad>();
+ paddings = g.nodes()->create<luci::CircleConst>();
+
+ pad->dtype(loco::DataType::FLOAT32);
+ paddings->dtype(loco::DataType::S32);
+
+ uint32_t channel_size = 16;
+ pad->shape({1, channel_size, 4, 4});
+ paddings->shape({4, 2});
+
+ // paddings data (NCHW)
+ // [[0,0], [0,0], [1,1], [2,2]]
+ paddings->size<loco::DataType::S32>(8);
+ for (uint32_t dim = 0; dim < 4; dim++)
+ {
+ for (uint32_t i = 0; i < 2; i++)
+ {
+ int32_t data = 0;
+
+ if (dim == 2)
+ data = 1;
+ else if (dim == 3)
+ data = 2;
+
+ paddings->at<loco::DataType::S32>(dim * 2 + i) = data;
+ }
+ }
+
+ pad->input(input);
+ pad->paddings(paddings);
+
+ pad->name("pad");
+ paddings->name("paddings");
+
+ return pad;
+ }
+
+public:
+ luci::CirclePad *pad = nullptr;
+ luci::CircleConst *paddings = nullptr;
+};
+
+class ReluGraph final : public SimpleGraph
+{
+protected:
+ loco::Node *insertGraphBody(loco::Node *input) override
+ {
+ relu = g.nodes()->create<luci::CircleRelu>();
+ relu->features(input);
+ relu->name("Relu");
+
+ return relu;
+ }
+
+public:
+ luci::CircleRelu *relu = nullptr;
+};
+
+class Relu6Graph final : public SimpleGraph
+{
+protected:
+ loco::Node *insertGraphBody(loco::Node *input) override
+ {
+ relu6 = g.nodes()->create<luci::CircleRelu6>();
+ relu6->features(input);
+ relu6->name("relu6");
+
+ return relu6;
+ }
+
+public:
+ luci::CircleRelu6 *relu6 = nullptr;
+};
+
+void check_pre_trans(loco::Node *node)
+{
+ auto pre_trans = dynamic_cast<luci::CircleTranspose *>(node);
+ EXPECT_NE(nullptr, pre_trans);
+ auto pre_trans_perm = dynamic_cast<luci::CircleConst *>(pre_trans->perm());
+ EXPECT_NE(nullptr, pre_trans_perm);
+ EXPECT_EQ(1, pre_trans_perm->rank());
+ EXPECT_EQ(4, pre_trans_perm->dim(0).value());
+ EXPECT_EQ(loco::DataType::S32, pre_trans_perm->dtype());
+ EXPECT_EQ(0, pre_trans_perm->at<loco::DataType::S32>(0));
+ EXPECT_EQ(2, pre_trans_perm->at<loco::DataType::S32>(1));
+ EXPECT_EQ(3, pre_trans_perm->at<loco::DataType::S32>(2));
+ EXPECT_EQ(1, pre_trans_perm->at<loco::DataType::S32>(3));
+}
+
+void check_post_trans(loco::Node *node)
+{
+ auto post_trans = dynamic_cast<luci::CircleTranspose *>(node);
+ EXPECT_NE(nullptr, post_trans);
+ auto post_trans_perm = dynamic_cast<luci::CircleConst *>(post_trans->perm());
+ EXPECT_NE(nullptr, post_trans_perm);
+ EXPECT_EQ(1, post_trans_perm->rank());
+ EXPECT_EQ(4, post_trans_perm->dim(0).value());
+ EXPECT_EQ(loco::DataType::S32, post_trans_perm->dtype());
+ EXPECT_EQ(0, post_trans_perm->at<loco::DataType::S32>(0));
+ EXPECT_EQ(3, post_trans_perm->at<loco::DataType::S32>(1));
+ EXPECT_EQ(1, post_trans_perm->at<loco::DataType::S32>(2));
+ EXPECT_EQ(2, post_trans_perm->at<loco::DataType::S32>(3));
+}
+
+void run_phase(loco::Graph *g, bool preserve_input, bool preserve_output)
+{
+ logo::Phase phase;
+
+ // Default passes.
+ phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+
+ // Pass to test
+ phase.emplace_back(
+ std::make_unique<luci::ConvertNCHWToNHWCPass>(preserve_input, preserve_output));
+
+ logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{g};
+ phase_runner.run(phase);
+}
+
+} // namespace
+
+TEST(ConvertNCHWToNHWCPassTest, name)
+{
+ luci::ConvertNCHWToNHWCPass pass(false, false);
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+TEST(ConvertNCHWToNHWC, Add)
+{
+ AddGraph g;
+ g.init();
+
+ run_phase(&g.g, false, false);
+
+ auto input_succs = loco::succs(g.input);
+ EXPECT_EQ(1, input_succs.size());
+ check_post_trans(*input_succs.begin());
+
+ check_pre_trans(g.add->x());
+
+ auto add_succs = loco::succs(g.add);
+ EXPECT_EQ(1, add_succs.size());
+ check_post_trans(*add_succs.begin());
+
+ uint32_t channel_size = 16;
+ auto new_beta = dynamic_cast<luci::CircleConst *>(g.add->y());
+ EXPECT_NE(nullptr, new_beta);
+ EXPECT_EQ(4, new_beta->rank());
+ EXPECT_EQ(1, new_beta->dim(0).value());
+ EXPECT_EQ(1, new_beta->dim(1).value());
+ EXPECT_EQ(1, new_beta->dim(2).value());
+ EXPECT_EQ(channel_size, new_beta->dim(3).value());
+
+ check_pre_trans(g.output->from());
+}
+
+TEST(ConvertNCHWToNHWC, Concatenation)
+{
+ ConcatenationGraph g;
+ g.init();
+
+ run_phase(&g.g, true, true);
+
+ check_pre_trans(g.concat->values(0));
+ check_pre_trans(g.concat->values(1));
+
+ auto concat_succs = loco::succs(g.concat);
+ EXPECT_EQ(1, concat_succs.size());
+ check_post_trans(*concat_succs.begin());
+
+ // Check concat shape, axis
+ EXPECT_EQ(1, g.concat->dim(0).value());
+ EXPECT_EQ(4, g.concat->dim(1).value());
+ EXPECT_EQ(4, g.concat->dim(2).value());
+ EXPECT_EQ(32, g.concat->dim(3).value());
+ EXPECT_EQ(3, g.concat->axis());
+}
+
+TEST(ConvertNCHWToNHWC, LeakyRelu)
+{
+ LeakyReluGraph g;
+ g.init();
+
+ run_phase(&g.g, true, true);
+
+ check_pre_trans(g.leakyrelu->features());
+
+ auto leakyrelu_succs = loco::succs(g.leakyrelu);
+ EXPECT_EQ(1, leakyrelu_succs.size());
+ check_post_trans(*leakyrelu_succs.begin());
+
+ // Check leakyrelu shape
+ EXPECT_EQ(1, g.leakyrelu->dim(0).value());
+ EXPECT_EQ(4, g.leakyrelu->dim(1).value());
+ EXPECT_EQ(4, g.leakyrelu->dim(2).value());
+ EXPECT_EQ(16, g.leakyrelu->dim(3).value());
+}
+
+TEST(ConvertNCHWToNHWC, Mul)
+{
+ MulGraph g;
+ g.init();
+
+ run_phase(&g.g, false, false);
+
+ auto input_succs = loco::succs(g.input);
+ EXPECT_EQ(1, input_succs.size());
+ check_post_trans(*input_succs.begin());
+
+ check_pre_trans(g.mul->x());
+
+ auto mul_succs = loco::succs(g.mul);
+ EXPECT_EQ(1, mul_succs.size());
+ check_post_trans(*mul_succs.begin());
+
+ uint32_t channel_size = 16;
+ auto new_multiplier = dynamic_cast<luci::CircleConst *>(g.mul->y());
+ EXPECT_NE(nullptr, new_multiplier);
+ EXPECT_EQ(4, new_multiplier->rank());
+ EXPECT_EQ(1, new_multiplier->dim(0).value());
+ EXPECT_EQ(1, new_multiplier->dim(1).value());
+ EXPECT_EQ(1, new_multiplier->dim(2).value());
+ EXPECT_EQ(channel_size, new_multiplier->dim(3).value());
+
+ check_pre_trans(g.output->from());
+}
+
+TEST(ConvertNCHWToNHWC, Neg)
+{
+ NegGraph g;
+ g.init();
+
+ run_phase(&g.g, true, true);
+
+ check_pre_trans(g.neg->x());
+
+ auto neg_succs = loco::succs(g.neg);
+ EXPECT_EQ(1, neg_succs.size());
+ check_post_trans(*neg_succs.begin());
+
+ // Check leakyrelu shape
+ EXPECT_EQ(1, g.neg->dim(0).value());
+ EXPECT_EQ(4, g.neg->dim(1).value());
+ EXPECT_EQ(4, g.neg->dim(2).value());
+ EXPECT_EQ(16, g.neg->dim(3).value());
+}
+
+TEST(ConvertNCHWToNHWC, Pad)
+{
+ PadGraph g;
+ g.init();
+
+ run_phase(&g.g, false, false);
+
+ auto input_succs = loco::succs(g.input);
+ EXPECT_EQ(1, input_succs.size());
+ check_post_trans(*input_succs.begin());
+
+ check_pre_trans(g.pad->input());
+
+ auto pad_succs = loco::succs(g.pad);
+ EXPECT_EQ(1, pad_succs.size());
+ check_post_trans(*pad_succs.begin());
+
+ auto new_paddings = dynamic_cast<luci::CircleConst *>(g.pad->paddings());
+ EXPECT_NE(nullptr, new_paddings);
+ EXPECT_EQ(2, new_paddings->rank());
+ EXPECT_EQ(4, new_paddings->dim(0).value());
+ EXPECT_EQ(2, new_paddings->dim(1).value());
+ EXPECT_EQ(0, new_paddings->at<loco::DataType::S32>(0));
+ EXPECT_EQ(0, new_paddings->at<loco::DataType::S32>(1));
+ EXPECT_EQ(1, new_paddings->at<loco::DataType::S32>(2));
+ EXPECT_EQ(1, new_paddings->at<loco::DataType::S32>(3));
+ EXPECT_EQ(2, new_paddings->at<loco::DataType::S32>(4));
+ EXPECT_EQ(2, new_paddings->at<loco::DataType::S32>(5));
+ EXPECT_EQ(0, new_paddings->at<loco::DataType::S32>(6));
+ EXPECT_EQ(0, new_paddings->at<loco::DataType::S32>(7));
+
+ check_pre_trans(g.output->from());
+}
+
+TEST(ConvertNCHWToNHWC, Unknown_Shape_NEG)
+{
+ AddGraph g;
+ g.init();
+
+ // Unknown shape
+ g.input->dim(0).unset();
+ g.add->dim(0).unset();
+ g.output->dim(0).unset();
+
+ luci::ConvertNCHWToNHWCPass pass(false, false);
+ EXPECT_EQ(false, pass.run(&g.g));
+}
+
+TEST(ConvertNCHWToNHWC, Preserve_Input_Output)
+{
+ // Preserve input
+ {
+ AddGraph g;
+ g.init();
+
+ run_phase(&g.g, true, false);
+
+ // Check input shape
+ EXPECT_EQ(1, g.input->dim(0).value());
+ EXPECT_EQ(16, g.input->dim(1).value());
+ EXPECT_EQ(4, g.input->dim(2).value());
+ EXPECT_EQ(4, g.input->dim(3).value());
+
+ // Check output shape
+ EXPECT_EQ(1, g.output->dim(0).value());
+ EXPECT_EQ(4, g.output->dim(1).value());
+ EXPECT_EQ(4, g.output->dim(2).value());
+ EXPECT_EQ(16, g.output->dim(3).value());
+ }
+
+ // Preserve output
+ {
+ AddGraph g;
+ g.init();
+
+ run_phase(&g.g, false, true);
+
+ // Check input shape
+ EXPECT_EQ(1, g.input->dim(0).value());
+ EXPECT_EQ(4, g.input->dim(1).value());
+ EXPECT_EQ(4, g.input->dim(2).value());
+ EXPECT_EQ(16, g.input->dim(3).value());
+
+ // Check output shape
+ EXPECT_EQ(1, g.output->dim(0).value());
+ EXPECT_EQ(16, g.output->dim(1).value());
+ EXPECT_EQ(4, g.output->dim(2).value());
+ EXPECT_EQ(4, g.output->dim(3).value());
+ }
+
+ // Preserve both input and output
+ {
+ AddGraph g;
+ g.init();
+
+ run_phase(&g.g, true, true);
+
+ // Check input shape
+ EXPECT_EQ(1, g.input->dim(0).value());
+ EXPECT_EQ(16, g.input->dim(1).value());
+ EXPECT_EQ(4, g.input->dim(2).value());
+ EXPECT_EQ(4, g.input->dim(3).value());
+
+ // Check output shape
+ EXPECT_EQ(1, g.output->dim(0).value());
+ EXPECT_EQ(16, g.output->dim(1).value());
+ EXPECT_EQ(4, g.output->dim(2).value());
+ EXPECT_EQ(4, g.output->dim(3).value());
+ }
+}
+
+TEST(ConvertNCHWToNHWC, Relu)
+{
+ ReluGraph g;
+ g.init();
+
+ run_phase(&g.g, true, true);
+
+ check_pre_trans(g.relu->features());
+
+ auto relu_succs = loco::succs(g.relu);
+ EXPECT_EQ(1, relu_succs.size());
+ check_post_trans(*relu_succs.begin());
+
+ // Check relu shape
+ EXPECT_EQ(1, g.relu->dim(0).value());
+ EXPECT_EQ(4, g.relu->dim(1).value());
+ EXPECT_EQ(4, g.relu->dim(2).value());
+ EXPECT_EQ(16, g.relu->dim(3).value());
+}
+
+TEST(ConvertNCHWToNHWC, Relu6)
+{
+ Relu6Graph g;
+ g.init();
+
+ run_phase(&g.g, true, true);
+
+ check_pre_trans(g.relu6->features());
+
+ auto relu6_succs = loco::succs(g.relu6);
+ EXPECT_EQ(1, relu6_succs.size());
+ check_post_trans(*relu6_succs.begin());
+
+ // Check relu6 shape
+ EXPECT_EQ(1, g.relu6->dim(0).value());
+ EXPECT_EQ(4, g.relu6->dim(1).value());
+ EXPECT_EQ(4, g.relu6->dim(2).value());
+ EXPECT_EQ(16, g.relu6->dim(3).value());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldAddV2Pass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <iostream>
+
+namespace
+{
+
+bool same_shape(const luci::CircleConst *x, const luci::CircleConst *y)
+{
+ if (x->rank() != y->rank())
+ return false;
+
+ for (uint32_t i = 0; i < x->rank(); i++)
+ {
+ if (!(x->dim(i) == y->dim(i)))
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * Fold AddV2 to const if both inputs are const
+ **/
+template <loco::DataType T> bool fold_add_v2(luci::CircleCustom *add_v2)
+{
+ // This should hold for AddV2
+ if (add_v2->numInputs() != 2)
+ return false;
+
+ // Check first input is const
+ auto x = dynamic_cast<luci::CircleConst *>(add_v2->inputs(0));
+ if (not x)
+ return false;
+
+ // Check second input is const
+ auto y = dynamic_cast<luci::CircleConst *>(add_v2->inputs(1));
+ if (not y)
+ return false;
+
+ if (x->dtype() != y->dtype())
+ return false;
+
+ if (!same_shape(x, y))
+ return false;
+
+ auto name_x = x->name();
+ auto name_y = y->name();
+ assert(name_x.length() > 0);
+ assert(name_y.length() > 0);
+ auto constant = add_v2->graph()->nodes()->create<luci::CircleConst>();
+ constant->dtype(x->dtype());
+ constant->rank(x->rank());
+ for (uint32_t i = 0; i < x->rank(); i++)
+ constant->dim(i).set(x->dim(i).value());
+
+ const auto size = x->size<T>();
+ constant->size<T>(size);
+ for (uint32_t i = 0; i < size; i++)
+ constant->at<T>(i) = x->at<T>(i) + y->at<T>(i);
+
+ constant->shape_status(luci::ShapeStatus::VALID);
+ constant->name(name_x + ";" + name_y);
+
+ for (auto succ : loco::succs(add_v2))
+ {
+ auto custom_out = loco::must_cast<luci::CircleCustomOut *>(succ);
+ loco::replace(custom_out).with(constant);
+ }
+
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * Constant Folding for AddV2 Op
+ **/
+bool FoldAddV2Pass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ if (auto custom = dynamic_cast<luci::CircleCustom *>(node))
+ {
+ if (custom->custom_code() == "AddV2")
+ {
+ // TODO: Support more data types
+ if (custom->dtype() == loco::DataType::S64)
+ {
+ if (fold_add_v2<loco::DataType::S64>(custom))
+ changed = true;
+ }
+ }
+ }
+ }
+
+ return changed;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldAddV2Pass.h"
+#include "PassTestGraphs.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+/**
+ * Graph has an AddV2 Op with constant inputs
+ *
+ * BEFORE
+ *
+ * [CircleConst] [CircleConst]
+ * | |
+ * [CircleCustom (AddV2)]
+ * |
+ * [CircleCustomOut]
+ *
+ * AFTER
+ *
+ * [CircleConst]
+ */
+template <loco::DataType T> class FoldAddV2Test : public luci::ConstantFoldingAddTestGraph
+{
+public:
+ FoldAddV2Test(std::initializer_list<uint32_t> shape) : luci::ConstantFoldingAddTestGraph(shape, T)
+ {
+ _addV2 = _g.nodes()->create<luci::CircleCustom>(2, 1);
+ _x = _g.nodes()->create<luci::CircleConst>();
+ _y = _g.nodes()->create<luci::CircleConst>();
+ _addV2_out = _g.nodes()->create<luci::CircleCustomOut>();
+
+ _addV2->dtype(T);
+ _x->dtype(T);
+ _y->dtype(T);
+ _addV2_out->dtype(T);
+
+ _addV2->shape(shape);
+ _x->shape(shape);
+ _y->shape(shape);
+ _addV2_out->shape(shape);
+
+ uint32_t num_elems = 1;
+ for (auto dim = shape.begin(); dim != shape.end(); dim++)
+ num_elems *= *dim;
+
+ _x->size<T>(num_elems);
+ _y->size<T>(num_elems);
+
+ for (uint32_t i = 0; i < num_elems; i++)
+ {
+ _x->at<T>(i) = i + 1;
+ _y->at<T>(i) = i + 1;
+ }
+
+ _addV2->custom_code("AddV2");
+ _addV2->inputs(0, _x);
+ _addV2->inputs(1, _y);
+ _addV2_out->input(_addV2);
+
+ _addV2->name("addV2");
+ _x->name("x");
+ _y->name("y");
+ }
+
+ loco::Node *createFoldedPattern() override { return _addV2_out; }
+
+ virtual ~FoldAddV2Test() = default;
+
+protected:
+ luci::CircleCustom *_addV2 = nullptr;
+ luci::CircleCustomOut *_addV2_out = nullptr;
+ luci::CircleConst *_x = nullptr;
+ luci::CircleConst *_y = nullptr;
+};
+
+class FoldS64AddV2Test : public FoldAddV2Test<loco::DataType::S64>, public ::testing::Test
+{
+public:
+ FoldS64AddV2Test() : FoldAddV2Test<loco::DataType::S64>({3}) {}
+
+ virtual void SetUp() { init(); }
+};
+
+} // namespace
+
+TEST(FoldAddV2PassTest, name)
+{
+ luci::FoldAddV2Pass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+TEST_F(FoldS64AddV2Test, fold_addV2)
+{
+ luci::FoldAddV2Pass pass;
+ while (pass.run(graph()))
+ ;
+
+ auto folded_const = getFoldedPattern();
+ EXPECT_NE(nullptr, folded_const);
+
+ // Check type, shape, values of folded const
+ EXPECT_EQ(loco::DataType::S64, folded_const->dtype());
+ EXPECT_EQ(1, folded_const->rank());
+ EXPECT_EQ(3, folded_const->dim(0).value());
+ EXPECT_EQ(2, folded_const->at<loco::DataType::S64>(0));
+ EXPECT_EQ(4, folded_const->at<loco::DataType::S64>(1));
+ EXPECT_EQ(6, folded_const->at<loco::DataType::S64>(2));
+}
+
+TEST_F(FoldS64AddV2Test, input_type_mismatch_NEG)
+{
+ _x->dtype(loco::DataType::S32);
+
+ luci::FoldAddV2Pass pass;
+ EXPECT_FALSE(pass.run(graph()));
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldCastPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+luci::CircleConst *cast_const(luci::CircleConst *node, loco::DataType from_dtype,
+ loco::DataType to_dtype)
+{
+ assert(node->dtype() == from_dtype);
+
+ auto name = node->name();
+ assert(name.length() > 0);
+ auto constant = node->graph()->nodes()->create<luci::CircleConst>();
+ constant->dtype(to_dtype);
+ constant->rank(node->rank());
+ uint32_t num_elems = 1;
+ for (uint32_t i = 0; i < node->rank(); i++)
+ {
+ constant->dim(i).set(node->dim(i).value());
+ num_elems *= node->dim(i).value();
+ }
+
+ constant->shape_status(luci::ShapeStatus::VALID);
+
+ // TODO: Support more data types
+ if (from_dtype == loco::DataType::S64)
+ {
+ if (to_dtype == loco::DataType::S32)
+ {
+ constant->size<loco::DataType::S32>(num_elems);
+ for (uint32_t i = 0; i < num_elems; i++)
+ constant->at<loco::DataType::S32>(i) =
+ static_cast<int32_t>(node->at<loco::DataType::S64>(i));
+
+ constant->name(name + "_S32");
+ return constant;
+ }
+ return nullptr;
+ }
+
+ return nullptr;
+}
+
+/**
+ * Fold Cast to const if it has const input
+ **/
+bool fold_cast(luci::CircleCast *cast)
+{
+ // Check cast has const input
+ auto const_x = dynamic_cast<luci::CircleConst *>(cast->x());
+ if (not const_x)
+ return false;
+
+ const auto in_dtype = const_x->dtype();
+ const auto out_dtype = cast->dtype();
+
+ auto casted_const = cast_const(const_x, in_dtype, out_dtype);
+ if (not casted_const)
+ return false;
+
+ loco::replace(cast).with(casted_const);
+
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * Constant Folding for Cast Op
+ **/
+bool FoldCastPass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ if (auto cast = dynamic_cast<luci::CircleCast *>(node))
+ {
+ if (fold_cast(cast))
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldCastPass.h"
+#include "PassTestGraphs.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+template <loco::DataType FromT, loco::DataType ToT>
+class FoldCastTest : public luci::ConstantFoldingAddTestGraph
+{
+public:
+ FoldCastTest(std::initializer_list<uint32_t> shape)
+ : luci::ConstantFoldingAddTestGraph(shape, ToT)
+ {
+ _cast = _g.nodes()->create<luci::CircleCast>();
+ _x = _g.nodes()->create<luci::CircleConst>();
+
+ _cast->dtype(ToT);
+ _x->dtype(FromT);
+
+ _cast->shape(shape);
+ _x->shape(shape);
+
+ uint32_t num_elems = 1;
+ for (auto dim = shape.begin(); dim != shape.end(); dim++)
+ num_elems *= *dim;
+
+ _x->size<FromT>(num_elems);
+ for (uint32_t i = 0; i < num_elems; i++)
+ _x->at<FromT>(i) = i + 1;
+
+ _cast->x(_x);
+
+ _cast->name("cast");
+ _x->name("x");
+ }
+
+ loco::Node *createFoldedPattern() override { return _cast; }
+
+protected:
+ luci::CircleCast *_cast = nullptr;
+ luci::CircleConst *_x = nullptr;
+};
+
+/**
+ * Graph that has a Cast Op with constant input
+ *
+ * BEFORE
+ *
+ * [CircleConst]
+ * |
+ * [Cast]
+ *
+ * AFTER
+ *
+ * [CircleConst]
+ *
+ */
+class FoldS64ToS32CastTest : public FoldCastTest<loco::DataType::S64, loco::DataType::S32>,
+ public ::testing::Test
+{
+public:
+ FoldS64ToS32CastTest() : FoldCastTest<loco::DataType::S64, loco::DataType::S32>({3}) {}
+
+ virtual void SetUp() { init(); }
+};
+
+} // namespace
+
+TEST(FoldCastPassTest, name)
+{
+ luci::FoldCastPass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+TEST_F(FoldS64ToS32CastTest, fold_cast_s64_to_s32)
+{
+ luci::FoldCastPass pass;
+ while (pass.run(graph()))
+ ;
+
+ auto folded_const = getFoldedPattern();
+ EXPECT_NE(nullptr, folded_const);
+
+ // Check type, shape, values of folded const
+ EXPECT_EQ(loco::DataType::S32, folded_const->dtype());
+ EXPECT_EQ(1, folded_const->rank());
+ EXPECT_EQ(3, folded_const->dim(0).value());
+ EXPECT_EQ(1, folded_const->at<loco::DataType::S32>(0));
+ EXPECT_EQ(2, folded_const->at<loco::DataType::S32>(1));
+ EXPECT_EQ(3, folded_const->at<loco::DataType::S32>(2));
+}
#include "luci/Pass/FoldDequantizePass.h"
#include <luci/IR/CircleNodes.h>
-
-#include <loco/Service/TypeInference.h>
+#include <luci/Profile/CircleNodeOrigin.h>
namespace
{
throw std::runtime_error("Given constant node has no quantization parameter");
}
+ auto name = const_node->name();
+ assert(name.length() > 0);
auto g = const_node->graph();
auto new_const_node = g->nodes()->create<luci::CircleConst>();
}
new_const_node->size<loco::DataType::FLOAT32>(dim_size);
new_const_node->shape_status(luci::ShapeStatus::VALID);
+ new_const_node->name(name + "_DQ");
const int32_t q_dim = const_node->quantparam()->quantized_dimension;
const int32_t q_dim_value = const_node->dim(q_dim).value();
qd = 0;
new_const_node->at<loco::DataType::FLOAT32>(i) =
- (float)(const_node->at<loco::DataType::S8>(i) - const_node->quantparam()->zerop.at(qd)) *
- const_node->quantparam()->scale.at(qd);
+ (float)(const_node->at<loco::DataType::S8>(i) - const_node->quantparam()->zerop.at(qd)) *
+ const_node->quantparam()->scale.at(qd);
}
}
else
qd = 0;
new_const_node->at<loco::DataType::FLOAT32>(i) =
- (float)((int)const_node->at<loco::DataType::U8>(i) -
- const_node->quantparam()->zerop.at(qd)) *
- const_node->quantparam()->scale.at(qd);
+ (float)((int)const_node->at<loco::DataType::U8>(i) -
+ const_node->quantparam()->zerop.at(qd)) *
+ const_node->quantparam()->scale.at(qd);
}
}
if (replace_const_node(const_node_user, const_node))
{
loco::replace(dequant).with(const_node_user);
+ luci::add_origin(loco::must_cast<luci::CircleNode *>(const_node_user),
+ luci::get_origin(dequant));
changed = true;
}
}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldDequantizePass.h"
+
+#include <gtest/gtest.h>
+
+TEST(FoldDequantizePassTest, name)
+{
+ luci::FoldDequantizePass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldSparseToDensePass.h"
+#include "CircleOptimizerUtils.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+/**
+ * Fold to const if
+ *
+ * 1. indices has 0-sized static shape such as [0]
+ * (i.e., output is filled with default value)
+ * 2. default_value: const scalar
+ * 3. output_shape: const
+ *
+ * TODO: Support more general patterns
+ **/
+template <loco::DataType IndexT, loco::DataType ValueT>
+bool fold_sparse_to_dense(luci::CircleSparseToDense *stod)
+{
+ const auto indices = loco::must_cast<luci::CircleNode *>(stod->indices());
+ const auto default_value = loco::must_cast<luci::CircleConst *>(stod->default_value());
+ const auto output_shape = loco::must_cast<luci::CircleConst *>(stod->output_shape());
+
+ bool has_zero = false;
+ for (uint32_t i = 0; i < indices->rank(); i++)
+ {
+ if (indices->dim(i).known() && indices->dim(i).value() == 0)
+ has_zero = true;
+ }
+ if (!has_zero)
+ return false;
+
+ if (default_value->rank() != 0 || default_value->size<ValueT>() != 1)
+ return false;
+
+ auto rank = output_shape->size<IndexT>();
+ std::vector<uint32_t> shape;
+ for (uint32_t i = 0; i < rank; i++)
+ {
+ auto dim = output_shape->at<IndexT>(i);
+ assert(dim >= 0 && dim <= std::numeric_limits<uint32_t>::max());
+ if (!(dim >= 0 && dim <= std::numeric_limits<uint32_t>::max()))
+ return false;
+
+ shape.push_back(dim);
+ }
+
+ auto name = stod->name();
+ assert(name.length() > 0);
+ auto constant = stod->graph()->nodes()->create<luci::CircleConst>();
+ constant->dtype(default_value->dtype());
+ constant->rank(rank);
+ uint32_t dim_size = 1;
+ for (uint32_t i = 0; i < rank; i++)
+ {
+ constant->dim(i).set(shape[i]);
+ dim_size *= shape[i];
+ }
+
+ constant->size<ValueT>(dim_size);
+ const auto value = default_value->scalar<ValueT>();
+ for (uint32_t i = 0; i < dim_size; i++)
+ constant->at<ValueT>(i) = value;
+
+ constant->shape_status(luci::ShapeStatus::VALID);
+ constant->name(name + "_D");
+
+ loco::replace(stod).with(constant);
+
+ return true;
+}
+
+bool fold_sparse_to_dense(luci::CircleSparseToDense *stod)
+{
+ auto indices = loco::must_cast<luci::CircleNode *>(stod->indices());
+ auto default_value = dynamic_cast<luci::CircleConst *>(stod->default_value());
+ if (not default_value)
+ return false;
+
+ auto output_shape = dynamic_cast<luci::CircleConst *>(stod->output_shape());
+ if (not output_shape)
+ return false;
+
+ // Illegal input check
+ if (indices->dtype() != output_shape->dtype())
+ throw std::runtime_error("indices and output_shape of SparseToDense must have the same dtype");
+
+ // TODO: Support more data types
+ if (indices->dtype() == loco::DataType::S64)
+ {
+ if (default_value->dtype() == loco::DataType::S64)
+ {
+ return fold_sparse_to_dense<loco::DataType::S64, loco::DataType::S64>(stod);
+ }
+ }
+ return false;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * Constant Folding for SparseToDense Op
+ **/
+bool FoldSparseToDensePass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ if (auto stod = dynamic_cast<luci::CircleSparseToDense *>(node))
+ {
+ if (fold_sparse_to_dense(stod))
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldSparseToDensePass.h"
+#include "PassTestGraphs.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+/**
+ * Graph that has a SparseToDense Op with zero-sized indices
+ *
+ * BEFORE
+ * - shape of indices: [0,1]
+ * - output_shape: [3]
+ * - default_value: scalar 2
+ *
+ * [indices] [output_shape] [values] [default_value]
+ * | | | |
+ * +------[SparseToDense]------+
+ *
+ * AFTER
+ *
+ * [Const] (shape: [3], values: [2, 2, 2])
+ *
+ */
+class S64SparseToDenseZeroIndicesTest : public luci::ConstantFoldingAddTestGraph,
+ public ::testing::Test
+{
+public:
+ S64SparseToDenseZeroIndicesTest() : luci::ConstantFoldingAddTestGraph({3}, loco::DataType::S64) {}
+
+ virtual void SetUp() { init(); }
+
+ loco::Node *createFoldedPattern() override
+ {
+ _stod = _g.nodes()->create<luci::CircleSparseToDense>();
+ _indices = _g.nodes()->create<luci::CircleConst>();
+ _output_shape = _g.nodes()->create<luci::CircleConst>();
+ _values = _g.nodes()->create<luci::CircleConst>();
+ _default_value = _g.nodes()->create<luci::CircleConst>();
+
+ _stod->dtype(loco::DataType::S64);
+ _indices->dtype(loco::DataType::S64);
+ _output_shape->dtype(loco::DataType::S64);
+ _values->dtype(loco::DataType::S64);
+ _default_value->dtype(loco::DataType::S64);
+
+ _indices->shape({0, 1});
+ _output_shape->shape({1});
+ _values->shape({0});
+ _default_value->rank(0);
+
+ _indices->size<loco::DataType::S64>(0);
+ _output_shape->size<loco::DataType::S64>(1);
+ _output_shape->at<loco::DataType::S64>(0) = 3;
+ _values->size<loco::DataType::S64>(0);
+ _default_value->size<loco::DataType::S64>(1);
+ _default_value->at<loco::DataType::S64>(0) = 2;
+
+ _stod->indices(_indices);
+ _stod->output_shape(_output_shape);
+ _stod->values(_values);
+ _stod->default_value(_default_value);
+
+ _stod->name("stod");
+ _indices->name("indices");
+ _output_shape->name("output_shape");
+ _values->name("values");
+ _default_value->name("default_value");
+
+ return _stod;
+ }
+
+protected:
+ luci::CircleSparseToDense *_stod = nullptr;
+ luci::CircleConst *_indices = nullptr;
+ luci::CircleConst *_output_shape = nullptr;
+ luci::CircleConst *_values = nullptr;
+ luci::CircleConst *_default_value = nullptr;
+};
+
+} // namespace
+
+TEST(FoldSparseToDensePassTest, name)
+{
+ luci::FoldSparseToDensePass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+TEST_F(S64SparseToDenseZeroIndicesTest, fold_stod_with_zero_indices)
+{
+ luci::FoldSparseToDensePass pass;
+ while (pass.run(graph()))
+ ;
+
+ auto folded_const = getFoldedPattern();
+ EXPECT_NE(nullptr, folded_const);
+
+ // Chec type, shape, values of folded const
+ EXPECT_EQ(loco::DataType::S64, folded_const->dtype());
+ EXPECT_EQ(1, folded_const->rank());
+ EXPECT_EQ(3, folded_const->dim(0).value());
+ EXPECT_EQ(2, folded_const->at<loco::DataType::S64>(0));
+ EXPECT_EQ(2, folded_const->at<loco::DataType::S64>(1));
+ EXPECT_EQ(2, folded_const->at<loco::DataType::S64>(2));
+}
+
+TEST_F(S64SparseToDenseZeroIndicesTest, illegal_input_NEG)
+{
+ _indices->dtype(loco::DataType::S32);
+
+ luci::FoldSparseToDensePass pass;
+ EXPECT_ANY_THROW(pass.run(graph()));
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ForwardReshapeToUnaryOpPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Log.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/Service/CircleShapeInference.h>
+#include <luci/Service/Nodes/CircleConst.h>
+
+namespace
+{
+
+luci::CircleReshape *as_reshape(loco::Node *node)
+{
+ return dynamic_cast<luci::CircleReshape *>(node);
+}
+
+luci::CircleConst *clone_shape(luci::CircleReshape *reshape)
+{
+ const auto shape = dynamic_cast<luci::CircleConst *>(reshape->shape());
+ // only support CircleConst for now
+ if (shape == nullptr)
+ return nullptr;
+
+ // NOTE tflite and circle only supports S32
+ // TODO just check with assert() after import handles this
+ auto dtype = shape->dtype();
+ if (dtype != loco::DataType::S32)
+ return nullptr;
+
+ return luci::clone(shape);
+}
+
+void copy_shape(luci::CircleReshape *reshape, luci::CircleReshape *new_reshape)
+{
+ auto ns_rank = reshape->newShape()->rank();
+ new_reshape->newShape()->rank(ns_rank);
+ for (uint32_t r = 0; r < ns_rank; ++r)
+ new_reshape->newShape()->dim(r) = reshape->newShape()->dim(r);
+}
+
+bool forward_reshape(luci::CircleReshape *reshape, luci::CircleNeg *neg)
+{
+ assert(reshape != nullptr);
+ assert(neg != nullptr);
+
+ luci::CircleConst *cloned_shape = clone_shape(reshape);
+ if (cloned_shape == nullptr)
+ return false;
+
+ auto name = reshape->name();
+ assert(name.length() > 0);
+ loco::Graph *graph = neg->graph();
+ // create reshape placed after neg
+ luci::CircleReshape *new_reshape = graph->nodes()->create<luci::CircleReshape>();
+ copy_shape(reshape, new_reshape);
+ new_reshape->shape(cloned_shape);
+ new_reshape->name(name + "_C");
+ luci::add_origin(new_reshape, luci::get_origin(reshape));
+
+ // reconnect network
+ loco::replace(neg).with(new_reshape);
+ neg->x(reshape->tensor());
+ new_reshape->tensor(neg);
+
+ // Do shape inference for this node again.
+ neg->shape_status(luci::ShapeStatus::UNDEFINED);
+
+ return true;
+}
+
+class ForwardReshape final : public luci::CircleNodeMutableVisitor<bool>
+{
+protected:
+ bool visit(luci::CircleNode *node)
+ {
+ LOGGER(l);
+ INFO(l) << "ForwardReshape: Unsupported operator: " << node->name() << std::endl;
+ return false;
+ }
+
+ bool visit(luci::CircleNeg *node)
+ {
+ auto reshape = as_reshape(node->x());
+ if (reshape == nullptr)
+ return false;
+ return forward_reshape(reshape, node);
+ }
+
+ // TODO add more unary operators
+};
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * BEFORE
+ * |
+ * [CircleNode] [CircleConst]
+ * | /
+ * [CircleReshape]
+ * / |
+ * [CircleNode] [(UnaryOp)]
+ * | | \
+ * | | [CircleNode]
+ * | | |
+ *
+ * UnaryOp: CircleNeg, ...
+ *
+ * AFTER
+ * |
+ * [CircleConst] [CircleNode]
+ * | / |
+ * [CircleReshape] [(UnaryOp)] [CircleConst]
+ * | | /
+ * [CircleNode] [CircleReshape]
+ * | | \
+ * | | [CircleNode]
+ * | | |
+ *
+ * Note: new [CircleReshape] after [(UnaryOp)] added
+ */
+bool ForwardReshapeToUnaryOpPass::run(loco::Graph *g)
+{
+ bool changed = false;
+ ForwardReshape forward;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ if (circle_node->accept(&forward))
+ changed = true;
+ }
+ return changed;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ForwardReshapeToUnaryOpPass.h"
+#include "luci/Pass/CircleShapeInferencePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+#include <vector>
+
+namespace
+{
+
+using namespace luci::test;
+
+class ReshapeNegGraphlet
+{
+public:
+ ReshapeNegGraphlet() = default;
+
+public:
+ void init(loco::Graph *g, const ShapeU32 shape_in, const ShapeU32 shape_out)
+ {
+ std::vector<uint32_t> shape_out_v = shape_out;
+
+ _reshape_shape = g->nodes()->create<luci::CircleConst>();
+ _reshape = g->nodes()->create<luci::CircleReshape>();
+ _neg = g->nodes()->create<luci::CircleNeg>();
+
+ _reshape_shape->dtype(loco::DataType::S32);
+ _reshape_shape->rank(1);
+ _reshape_shape->dim(0).set(shape_out_v.size());
+ _reshape_shape->shape_status(luci::ShapeStatus::VALID);
+ // values
+ const auto size = shape_out_v.size();
+ _reshape_shape->size<loco::DataType::S32>(size);
+ for (uint32_t i = 0; i < size; i++)
+ _reshape_shape->at<loco::DataType::S32>(i) = shape_out_v[i];
+
+ _reshape_shape->name("reshape_shape");
+ _reshape->name("reshape");
+ _neg->name("neg");
+ }
+
+protected:
+ luci::CircleReshape *_reshape = nullptr;
+ luci::CircleNeg *_neg = nullptr;
+ luci::CircleConst *_reshape_shape = nullptr;
+};
+
+class ForwardReshapeToNegGraph : public TestIOGraph, public ReshapeNegGraphlet
+{
+public:
+ ForwardReshapeToNegGraph() = default;
+
+public:
+ void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+ {
+ TestIOGraph::init(shape_in, shape_out);
+ ReshapeNegGraphlet::init(g(), shape_in, shape_out);
+
+ // connect network
+ _reshape->tensor(input());
+ _reshape->shape(_reshape_shape);
+ _neg->x(_reshape);
+
+ output()->from(_neg);
+ }
+};
+
+class ForwardReshapeToNegGraphTest : public ::testing::Test
+{
+public:
+ ForwardReshapeToNegGraphTest() = default;
+
+ void run_pass(void)
+ {
+ while (_pass.run(_graph.g()))
+ ;
+ }
+
+protected:
+ ForwardReshapeToNegGraph _graph;
+ luci::ForwardReshapeToUnaryOpPass _pass;
+};
+
+} // namespace
+
+TEST(ForwardReshapeToUnaryOpPassTest, name)
+{
+ luci::ForwardReshapeToUnaryOpPass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+TEST_F(ForwardReshapeToNegGraphTest, simple_forward)
+{
+ _graph.init({2, 2, 2}, {2, 4});
+
+ run_pass();
+
+ auto reshape = dynamic_cast<luci::CircleReshape *>(_graph.output()->from());
+ auto neg = dynamic_cast<luci::CircleNeg *>(_graph.output()->from());
+ ASSERT_NE(nullptr, reshape);
+ ASSERT_EQ(nullptr, neg);
+ neg = dynamic_cast<luci::CircleNeg *>(reshape->tensor());
+ ASSERT_NE(nullptr, neg);
+}
#include "luci/Pass/FuseActivationFunctionPass.h"
#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeMixins.h>
#include <luci/IR/CircleOpcode.h>
+#include <luci/Profile/CircleNodeOrigin.h>
namespace luci
{
return false;
auto node_with_fused_act =
- dynamic_cast<luci::LuciNodeMixin<luci::LuciNodeTrait::FusedActFunc> *>(pred_node);
+ dynamic_cast<luci::CircleNodeMixin<luci::CircleNodeTrait::FusedActFunc> *>(pred_node);
if (node_with_fused_act == nullptr)
return false;
+ // TODO remove this work-around
+ // This will skip fuse for concat as luci-interpreter doesn't support this yet
+ if (dynamic_cast<luci::CircleConcatenation *>(pred_node) != nullptr)
+ return false;
+
auto fused_act = node_with_fused_act->fusedActivationFunction();
luci::FusedActFunc target_func = luci::FusedActFunc::UNDEFINED;
return false;
node_with_fused_act->fusedActivationFunction(target_func);
+ luci::add_origin(pred_node, luci::get_origin(node));
loco::replace(node).with(pred_node);
node->drop();
* limitations under the License.
*/
-#include "FuseActivationFunctionPassInternal.h"
+#include "luci/Pass/FuseActivationFunctionPass.h"
#include <luci/IR/CircleNodes.h>
+#include <luci/test/TestIOGraph.h>
+
#include <gtest/gtest.h>
namespace
{
+using namespace luci::test;
+
/**
* Simple graph for test
*
* [Conv2]
*
*/
-class SimpleGraph
+class ConvReluConvGraphlet
+{
+public:
+ ConvReluConvGraphlet() = default;
+
+ void init(loco::Graph *g)
+ {
+ _conv1 = g->nodes()->create<luci::CircleConv2D>();
+ _conv2 = g->nodes()->create<luci::CircleConv2D>();
+ _relu = g->nodes()->create<luci::CircleRelu>();
+ _conv1_f = g->nodes()->create<luci::CircleConst>();
+ _conv1_b = g->nodes()->create<luci::CircleConst>();
+ _conv2_f = g->nodes()->create<luci::CircleConst>();
+ _conv2_b = g->nodes()->create<luci::CircleConst>();
+
+ _conv1->fusedActivationFunction(luci::FusedActFunc::NONE);
+
+ _conv1->name("conv1");
+ _conv2->name("conv2");
+ _relu->name("relu");
+ _conv1_f->name("conv1f");
+ _conv1_b->name("conv1b");
+ _conv2_f->name("conv2f");
+ _conv2_b->name("conv2b");
+ }
+
+public:
+ luci::CircleRelu *relu() { return _relu; }
+ luci::CircleConv2D *conv1() { return _conv1; }
+ luci::CircleConv2D *conv2() { return _conv2; }
+
+protected:
+ luci::CircleConv2D *_conv1 = nullptr;
+ luci::CircleConv2D *_conv2 = nullptr;
+ luci::CircleRelu *_relu = nullptr;
+ luci::CircleConst *_conv1_f = nullptr;
+ luci::CircleConst *_conv1_b = nullptr;
+ luci::CircleConst *_conv2_f = nullptr;
+ luci::CircleConst *_conv2_b = nullptr;
+};
+
+class FuseActTestGraph : public TestIOGraph, public ConvReluConvGraphlet
{
public:
- SimpleGraph()
+ FuseActTestGraph() = default;
+
+ void init(void)
{
- conv1 = g.nodes()->create<luci::CircleConv2D>();
- conv2 = g.nodes()->create<luci::CircleConv2D>();
- relu = g.nodes()->create<luci::CircleRelu>();
+ TestIOGraph::init({1}, {1});
+ ConvReluConvGraphlet::init(g());
- conv1->fusedActivationFunction(luci::FusedActFunc::NONE);
+ _conv1->input(input());
+ _conv1->filter(_conv1_f);
+ _conv1->bias(_conv1_b);
- relu->features(conv1);
- conv2->input(relu);
+ _relu->features(_conv1);
+
+ _conv2->input(_relu);
+ _conv2->filter(_conv2_f);
+ _conv2->bias(_conv2_b);
+
+ output()->from(_conv2);
}
+};
+class ConvHasMultiSuccGraph : public TestIOGraph, public ConvReluConvGraphlet
+{
public:
- loco::Graph g;
- luci::CircleConv2D *conv1;
- luci::CircleConv2D *conv2;
- luci::CircleRelu *relu;
+ ConvHasMultiSuccGraph() = default;
+
+ void init(void)
+ {
+ TestIOGraph::init({1}, {1});
+ ConvReluConvGraphlet::init(g());
+
+ _conv1->input(input());
+ _conv1->filter(_conv1_f);
+ _conv1->bias(_conv1_b);
+
+ _relu->features(_conv1);
+
+ _conv2->input(_conv1);
+ _conv2->filter(_conv2_f);
+ _conv2->bias(_conv2_b);
+
+ output()->from(_relu); // We need to check from relu
+ }
};
+// TODO use ::testing::Test
+
} // namespace
+TEST(FuseActivationFunctionPassTest, name)
+{
+ luci::FuseActivationFunctionPass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
TEST(FusePreActivationBatchNorm, fuse_activation_function)
{
- SimpleGraph g;
+ FuseActTestGraph g;
+ luci::FuseActivationFunctionPass pass;
- EXPECT_TRUE(luci::fuse_activation_function(g.relu));
+ g.init();
- EXPECT_EQ(g.conv1, g.conv2->input());
+ EXPECT_TRUE(pass.run(g.g()));
+ EXPECT_EQ(g.conv1(), g.conv2()->input());
}
TEST(FusePreActivationBatchNorm, fuse_activation_function_dup_relu)
{
- SimpleGraph g;
- g.conv1->fusedActivationFunction(luci::FusedActFunc::RELU);
+ FuseActTestGraph g;
+ luci::FuseActivationFunctionPass pass;
- EXPECT_TRUE(luci::fuse_activation_function(g.relu));
+ g.init();
+ g.conv1()->fusedActivationFunction(luci::FusedActFunc::RELU);
- EXPECT_EQ(g.conv1, g.conv2->input());
+ EXPECT_TRUE(pass.run(g.g()));
+ EXPECT_EQ(g.conv1(), g.conv2()->input());
}
-TEST(FusePreActivationBatchNorm, fuse_activation_function_NEG)
+TEST(FusePreActivationBatchNorm, fuse_activation_function_mulsucc_NEG)
{
- SimpleGraph g;
- g.conv2->input(g.conv1);
+ ConvHasMultiSuccGraph g;
+ luci::FuseActivationFunctionPass pass;
+
+ g.init();
- // Conv1 has multiple successors
- EXPECT_FALSE(luci::fuse_activation_function(g.relu));
+ // Relu input Conv2D has multiple successors
+ EXPECT_FALSE(pass.run(g.g()));
+}
+
+TEST(FusePreActivationBatchNorm, fuse_activation_function_tanh_NEG)
+{
+ FuseActTestGraph g;
+ luci::FuseActivationFunctionPass pass;
- g.conv2->input(g.relu);
- g.conv1->fusedActivationFunction(luci::FusedActFunc::TANH);
+ g.init();
+ g.conv1()->fusedActivationFunction(luci::FusedActFunc::TANH);
- // Conv1 already has activation function
- EXPECT_FALSE(luci::fuse_activation_function(g.relu));
+ // Relu input Conv2D already has activation function
+ EXPECT_FALSE(pass.run(g.g()));
}
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_CIRCLE_FUSE_ACTIVATION_FUNCTION_PASS_INTERNAL_H__
-#define __LUCI_CIRCLE_FUSE_ACTIVATION_FUNCTION_PASS_INTERNAL_H__
-
-#include <luci/IR/CircleNodes.h>
-
-namespace luci
-{
-
-// Fuse activation function with preceding Op
-/// @return true if success
-bool fuse_activation_function(luci::CircleNode *node);
-
-} // namespace luci
-
-#endif // __LUCI_CIRCLE_FUSE_ACTIVATION_FUNCTION_PASS_INTERNAL_H__
#include "luci/Pass/FuseAddWithTConvPass.h"
#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
namespace
{
/**
- * Fuse add to TCONV if possible
+ * Fuse Add to TransposeConv if possible
*
* BEFORE
- *
- * [CircleTransposeConv]
+ * |
+ * [CircleConst] [CircleTransposeConv]
+ * \ |
+ * [CircleAdd]
* |
- * [add]
+ *
* AFTER
+ * |
+ * [CircleConst] |
+ * \ |
+ * [CircleTransposeConv] [CircleAdd]
+ * |
+ * ([CircleRelu6])
+ * |
*
- * [CircleTransposeConv]
+ * Note: CircleRelu6 is inserted if Add activation is ReLU6
*/
bool fuse_add_with_tconv(luci::CircleTransposeConv *tconv)
{
if (add->fusedActivationFunction() == luci::FusedActFunc::RELU6)
{
+ auto name = addition->name();
+ assert(name.length() > 0);
// separate relu op from add op
auto relu = add->graph()->nodes()->create<luci::CircleRelu6>();
relu->features(tconv);
+ relu->name(name + "/Relu6");
+ luci::add_origin(relu, luci::get_origin(add));
// remove add node
replace(add).with(relu);
replace(add).with(tconv);
}
+ // set origin
+ luci::add_origin(tconv, luci::get_origin(add));
+
return true;
}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseAddWithTConvPass.h"
+
+#include <gtest/gtest.h>
+
+TEST(FuseAddWithTConvPassTest, name)
+{
+ luci::FuseAddWithTConvPass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
#include "luci/Pass/FuseBCQPass.h"
#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
#include <luci/Log.h>
#include <cassert>
{
public:
BCQFuser<1>(int32_t original_output_cnt, int32_t bundle_cnt)
- : _original_output_cnt{original_output_cnt}, _bundle_cnt{bundle_cnt}
+ : _original_output_cnt{original_output_cnt}, _bundle_cnt{bundle_cnt}
{
// Do nothing
}
{
const auto prefix = (output_node->index() - (_original_output_cnt + 1)) / (_bundle_cnt);
const MetadataType metadata_type = static_cast<MetadataType>(
- (output_node->index() - (_original_output_cnt + 1)) % (_bundle_cnt));
+ (output_node->index() - (_original_output_cnt + 1)) % (_bundle_cnt));
const auto circle_node = loco::must_cast<luci::CircleNode *>(output_node->from());
add_BCQ_info_node(prefix, metadata_type, circle_node);
}
if (prefix == -1 || !is_valid_prefix(prefix))
continue;
+ auto name = gather->name();
+ assert(name.length() > 0);
+
auto bcq_gather = g->nodes()->create<luci::CircleBCQGather>();
+ luci::add_origin(bcq_gather, luci::get_origin(gather));
bcq_gather->op_version(1);
bcq_gather->input_scales(alpha(g, prefix));
bcq_gather->input_binary(packed_binary_code(g, prefix));
bcq_gather->indices(gather->indices());
bcq_gather->input_clusters(packed_clusters(g, prefix));
+ bcq_gather->name(name + "/BCQGather");
if (_do_w_x[prefix]->at<loco::DataType::BOOL>(0))
{
bcq_gather->axis(axis_transpose);
const auto indices_rank =
- loco::must_cast<luci::CircleNode *>(gather->indices())->rank();
+ loco::must_cast<luci::CircleNode *>(gather->indices())->rank();
auto perm = g->nodes()->create<luci::CircleConst>();
perm->dtype(loco::DataType::S32);
perm->at<loco::DataType::S32>(idx) = idx + 1;
perm->at<loco::DataType::S32>(indices_rank) = 0;
perm->shape_status(luci::ShapeStatus::VALID);
+ perm->name(name + "/Transpose/perm");
auto output_transpose = g->nodes()->create<luci::CircleTranspose>();
+ luci::add_origin(output_transpose, luci::get_origin(gather));
output_transpose->a(bcq_gather);
output_transpose->perm(perm);
+ output_transpose->name(name + "/Transpose");
loco::replace(gather).with(output_transpose);
}
if (prefix == -1 || !is_valid_prefix(prefix))
continue;
+ auto name = fully_connected->name();
+ assert(name.length() > 0);
+
auto bcq_fc = g->nodes()->create<luci::CircleBCQFullyConnected>();
+ luci::add_origin(bcq_fc, luci::get_origin(fully_connected));
bcq_fc->op_version(1);
bcq_fc->weights_scales(alpha(g, prefix));
bcq_fc->bias(fully_connected->bias());
bcq_fc->weights_clusters(packed_clusters(g, prefix));
bcq_fc->fusedActivationFunction(fully_connected->fusedActivationFunction());
+ bcq_fc->name(name + "/BCQFullyConnected");
loco::Node *bcq_input = fully_connected->input();
new_shape->rank(1);
new_shape->dim(0) = 2;
- auto batch_size = 1;
- for (uint32_t i = 0; i < original_input->rank() - 1; ++i)
- batch_size *= original_input->dim(i).value();
-
- new_shape->at<loco::DataType::S32>(0) = batch_size;
- new_shape->at<loco::DataType::S32>(1) =
- original_input->dim(original_input->rank() - 1).value();
+ new_shape->at<loco::DataType::S32>(0) = -1;
+ new_shape->at<loco::DataType::S32>(1) = weights->dim(1).value();
new_shape->shape_status(luci::ShapeStatus::VALID);
+ new_shape->name(name + "/Reshape/shape");
auto reshape = g->nodes()->create<luci::CircleReshape>();
+ luci::add_origin(reshape, luci::get_origin(fully_connected));
reshape->tensor(original_input);
reshape->shape(new_shape);
+ reshape->name(name + "/Reshape");
bcq_input = reshape;
}
perm->at<loco::DataType::S32>(0) = 1;
perm->at<loco::DataType::S32>(1) = 0;
perm->shape_status(luci::ShapeStatus::VALID);
+ perm->name(name + "/Transpose/perm");
auto input_transpose = g->nodes()->create<luci::CircleTranspose>();
+ luci::add_origin(input_transpose, luci::get_origin(fully_connected));
input_transpose->a(bcq_input);
input_transpose->perm(perm);
+ input_transpose->name(name + "_input/Transpose");
bcq_fc->input(input_transpose);
auto output_transpose = g->nodes()->create<luci::CircleTranspose>();
+ luci::add_origin(output_transpose, luci::get_origin(fully_connected));
output_transpose->a(bcq_fc);
output_transpose->perm(perm);
+ output_transpose->name(name + "_output/Transpose");
loco::replace(fully_connected).with(output_transpose);
return true;
}
else if (auto weights_as_input =
- dynamic_cast<luci::CircleConst *>(fully_connected->input()))
+ dynamic_cast<luci::CircleConst *>(fully_connected->input()))
{
auto prefix = get_prefix_of_const(weights_as_input);
if (prefix == -1 || !is_valid_prefix(prefix))
assert(_do_w_x[prefix]->at<loco::DataType::BOOL>(0) == true);
+ auto name = weights_as_input->name();
+ assert(name.length() > 0);
+
auto perm = g->nodes()->create<luci::CircleConst>();
perm->dtype(loco::DataType::S32);
perm->size<loco::DataType::S32>(2);
perm->at<loco::DataType::S32>(0) = 1;
perm->at<loco::DataType::S32>(1) = 0;
perm->shape_status(luci::ShapeStatus::VALID);
+ perm->name(name + "/Transpose/perm");
auto input_transpose = g->nodes()->create<luci::CircleTranspose>();
+ luci::add_origin(input_transpose, luci::get_origin(fully_connected));
input_transpose->a(fully_connected->weights());
input_transpose->perm(perm);
+ input_transpose->name(name + "/Transpose");
auto bcq_fc = g->nodes()->create<luci::CircleBCQFullyConnected>();
+ luci::add_origin(bcq_fc, luci::get_origin(fully_connected));
assert(dynamic_cast<luci::CircleOutputExclude *>(fully_connected->bias()) != nullptr);
bcq_fc->weights_hidden_size(weights_as_input->dim(1).value());
bcq_fc->input(input_transpose);
+ bcq_fc->name(name + "/BCQFullyConnected");
+
loco::replace(fully_connected).with(bcq_fc);
return true;
new_beta->dim(1) = _packed_binary_code[prefix]->dim(1);
for (uint32_t i = 0; i < _packed_binary_code[prefix]->size<loco::DataType::S32>(); ++i)
new_beta->at<loco::DataType::S32>(i) =
- _packed_binary_code[prefix]->at<loco::DataType::S32>(i);
+ _packed_binary_code[prefix]->at<loco::DataType::S32>(i);
new_beta->shape_status(luci::ShapeStatus::VALID);
return new_beta;
for (int i = 0; i < number_of_clusters; ++i)
{
packed_clusters->at<loco::DataType::S32>(i * 2) =
- qbits_of_clusters->at<loco::DataType::S32>(i);
+ qbits_of_clusters->at<loco::DataType::S32>(i);
packed_clusters->at<loco::DataType::S32>(i * 2 + 1) =
- size_of_clusters->at<loco::DataType::S32>(i);
+ size_of_clusters->at<loco::DataType::S32>(i);
}
return packed_clusters;
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseBCQPass.h"
+
+#include <gtest/gtest.h>
+
+TEST(FuseBCQPassTest, name)
+{
+ luci::FuseBCQPass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseBatchNormWithConvPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+/**
+ * Fuse Mul-Add to Conv2D if possible.
+ *
+ * NOTE TF's BatchNormalization is converted to Mul and Add.
+ *
+ * BEFORE
+ * | [CircleConst]
+ * | / [CircleConst]
+ * | / /
+ * [CircleConv2D] [CircleConst]
+ * | /
+ * [CircleMul] [CircleConst]
+ * | /
+ * [CircleAdd]
+ * |
+ *
+ * AFTER
+ * | [CircleConst]
+ * +--------------+ / [CircleConst]
+ * | | / /
+ * | [CircleConv2D] [CircleConst]
+ * [CircleConst] | | /
+ * [CircleConst] \ | [CircleMul] [CircleConst]
+ * \ \ | | /
+ * [CircleConv2D] [CircleAdd]
+ * |
+ */
+bool fused_batch_norm_with_conv(luci::CircleAdd *add)
+{
+ luci::CircleMul *mul = nullptr;
+ luci::CircleConst *shift = nullptr;
+ if (auto add_lhs = dynamic_cast<luci::CircleMul *>(add->x()))
+ {
+ mul = add_lhs;
+ shift = dynamic_cast<luci::CircleConst *>(add->y());
+ }
+ else if (auto add_rhs = dynamic_cast<luci::CircleMul *>(add->y()))
+ {
+ mul = add_rhs;
+ shift = dynamic_cast<luci::CircleConst *>(add->x());
+ }
+
+ // If CircleMul is not found or constant operand of CircleAdd is not found,
+ // this pass cannot be applied.
+ if (mul == nullptr || shift == nullptr)
+ return false;
+
+ // If FusedActivationFunction of mul is not none, this pass cannot be applied.
+ if (mul->fusedActivationFunction() != luci::FusedActFunc::NONE)
+ return false;
+
+ // To apply this pass, shape of shift should be [1, 1, 1, out_channel].
+ if (shift->rank() != 4)
+ return false;
+ for (uint32_t i = 0; i < 3; ++i)
+ if (shift->dim(i).value() != 1)
+ return false;
+
+ luci::CircleConv2D *conv = nullptr;
+ luci::CircleConst *scale = nullptr;
+ if (auto mul_lhs = dynamic_cast<luci::CircleConv2D *>(mul->x()))
+ {
+ conv = mul_lhs;
+ scale = dynamic_cast<luci::CircleConst *>(mul->y());
+ }
+ else if (auto mul_rhs = dynamic_cast<luci::CircleConv2D *>(mul->y()))
+ {
+ conv = mul_rhs;
+ scale = dynamic_cast<luci::CircleConst *>(mul->x());
+ }
+
+ // If CircleConv2D is not found or constant operand of CircleMul is not found,
+ // this pass cannot be applied.
+ if (conv == nullptr || scale == nullptr)
+ return false;
+
+ // To apply this pass, shape of scale should be [1, 1, 1, out_channel].
+ if (scale->rank() != 4)
+ return false;
+ for (uint32_t i = 0; i < 3; ++i)
+ if (scale->dim(i).value() != 1)
+ return false;
+
+ // If FusedActivationFunction of conv is not none, this pass cannot be applied.
+ if (conv->fusedActivationFunction() != luci::FusedActFunc::NONE)
+ return false;
+
+ luci::CircleConst *filter = dynamic_cast<luci::CircleConst *>(conv->filter());
+ luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(conv->bias());
+
+ // If filter or bias of conv is not const, this pass cannot be applied.
+ if (filter == nullptr || bias == nullptr)
+ return false;
+
+ // If dtype of filter is different with scale and shift, multiplication may be impossible.
+ if (filter->dtype() != scale->dtype())
+ return false;
+ if (filter->dtype() != shift->dtype())
+ return false;
+
+ // TODO Support more data type
+ if (filter->dtype() != loco::DataType::FLOAT32)
+ return false;
+
+ // Output channel dimension should be same. If not, this pass cannot be applied.
+ if (filter->dim(0).value() != scale->dim(3).value())
+ return false;
+ if (filter->dim(0).value() != shift->dim(3).value())
+ return false;
+
+ auto name = add->name();
+ assert(name.length() > 0);
+
+ luci::CircleConv2D *fused_conv = add->graph()->nodes()->create<luci::CircleConv2D>();
+ luci::CircleConst *fused_filter = add->graph()->nodes()->create<luci::CircleConst>();
+ luci::CircleConst *fused_bias = add->graph()->nodes()->create<luci::CircleConst>();
+
+ uint32_t filter_out_channel = filter->dim(0).value();
+ uint32_t filter_height = filter->dim(1).value();
+ uint32_t filter_width = filter->dim(2).value();
+ uint32_t filter_in_channel = filter->dim(3).value();
+
+ // Copy filter
+ fused_filter->dtype(filter->dtype());
+ fused_filter->size<loco::DataType::FLOAT32>(filter->size<loco::DataType::FLOAT32>());
+ fused_filter->rank(4);
+ fused_filter->dim(0).set(filter_out_channel);
+ fused_filter->dim(1).set(filter_height);
+ fused_filter->dim(2).set(filter_width);
+ fused_filter->dim(3).set(filter_in_channel);
+ fused_filter->shape_status(luci::ShapeStatus::VALID);
+ fused_filter->name(name + "/Conv2D/filter");
+
+ // Fuse scale to new filter
+ for (uint32_t c = 0; c < filter_out_channel; c++)
+ {
+ for (uint32_t h = 0; h < filter_height; h++)
+ {
+ for (uint32_t w = 0; w < filter_width; w++)
+ {
+ for (uint32_t b = 0; b < filter_in_channel; b++)
+ {
+ uint32_t offset = c * filter_height * filter_width * filter_in_channel +
+ h * filter_width * filter_in_channel + w * filter_in_channel + b;
+ fused_filter->at<loco::DataType::FLOAT32>(offset) =
+ filter->at<loco::DataType::FLOAT32>(offset) * scale->at<loco::DataType::FLOAT32>(c);
+ }
+ }
+ }
+ }
+
+ // Copy bias
+ assert(bias->rank() == 1);
+ assert(bias->dim(0).value() == filter_out_channel);
+ fused_bias->dtype(bias->dtype());
+ fused_bias->size<loco::DataType::FLOAT32>(bias->size<loco::DataType::FLOAT32>());
+ fused_bias->rank(1);
+ fused_bias->dim(0).set(filter_out_channel);
+ fused_bias->shape_status(luci::ShapeStatus::VALID);
+ fused_bias->name(name + "/Conv2D/bias");
+
+ // Fuse scale and shift to bias
+ for (uint32_t b = 0; b < filter_out_channel; ++b)
+ {
+ fused_bias->at<loco::DataType::FLOAT32>(b) =
+ bias->at<loco::DataType::FLOAT32>(b) * scale->at<loco::DataType::FLOAT32>(b) +
+ shift->at<loco::DataType::FLOAT32>(b);
+ }
+
+ // Set attributes of fused_conv
+ fused_conv->input(conv->input());
+ fused_conv->filter(fused_filter);
+ fused_conv->bias(fused_bias);
+ fused_conv->fusedActivationFunction(add->fusedActivationFunction());
+ fused_conv->padding(conv->padding());
+ fused_conv->stride()->h(conv->stride()->h());
+ fused_conv->stride()->w(conv->stride()->w());
+ fused_conv->dilation()->h(conv->dilation()->h());
+ fused_conv->dilation()->w(conv->dilation()->w());
+ fused_conv->name(name + "/Conv2D");
+ luci::add_origin(fused_conv, luci::composite_origin({luci::get_origin(add), luci::get_origin(mul),
+ luci::get_origin(conv)}));
+
+ replace(add).with(fused_conv);
+
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool FuseBatchNormWithConvPass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ if (auto add = dynamic_cast<luci::CircleAdd *>(node))
+ {
+ if (fused_batch_norm_with_conv(add))
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseBatchNormWithConvPass.h"
+
+#include <gtest/gtest.h>
+
+TEST(FuseBatchNormWithConvPassTest, name)
+{
+ luci::FuseBatchNormWithConvPass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseBatchNormWithDwConvPass.h"
+
+#include "helpers/NodeFiller.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+/**
+ * Fuse Mul-Add to DepthwiseConv2D if possible.
+ *
+ * NOTE TF's BatchNormalization is converted to Mul and Add.
+ *
+ * BEFORE
+ * | [CircleConst]
+ * | / [CircleConst]
+ * | / /
+ * [CircleDepthwiseConv2D] [CircleConst]
+ * | /
+ * [CircleMul] [CircleConst]
+ * | /
+ * [CircleAdd]
+ * |
+ *
+ * AFTER
+ * | [CircleConst]
+ * +-------------------------------------+ / [CircleConst]
+ * | | / /
+ * | [CircleDepthwiseConv2D] [CircleConst]
+ * | [CircleConst] | /
+ * | / [CircleConst] [CircleMul] [CircleConst]
+ * | / / | /
+ * [CircleDepthwiseConv2D] [CircleAdd]
+ * |
+ *
+ */
+
+/**
+ * @brief Check shape is [x] or [1, 1, 1, x]
+ */
+bool is_scale_shift_shape(luci::CircleConst *node)
+{
+ auto rank = node->rank();
+ if (rank != 1 && rank != 4)
+ return false;
+ for (uint32_t r = 0; r < rank - 1; ++r)
+ {
+ if (node->dim(r).value() != 1)
+ return false;
+ }
+ return true;
+}
+
+bool fused_batch_norm_with_dwconv(luci::CircleAdd *add)
+{
+ assert(add != nullptr);
+
+ // Find the pattern of CircleDepthwiseConv2D - CircleMul - CircleAdd
+ luci::CircleConst *scale = nullptr;
+ luci::CircleConst *shift = nullptr;
+ luci::CircleDepthwiseConv2D *dwconv = nullptr;
+ luci::CircleMul *mul = nullptr;
+ if (not luci::fill(&shift, &mul).with_commutative_args_of(add))
+ return false;
+ if (not luci::fill(&scale, &dwconv).with_commutative_args_of(mul))
+ return false;
+
+ // check scale and shift constant attributes
+ // scale and shift can be [x] or [1, 1, 1, x]
+ if (not is_scale_shift_shape(scale))
+ return false;
+ if (not is_scale_shift_shape(shift))
+ return false;
+
+ // check mul, add attributes
+ if (mul->dtype() != loco::DataType::FLOAT32)
+ return false;
+ if (mul->fusedActivationFunction() != luci::FusedActFunc::NONE)
+ return false;
+ if (add->dtype() != loco::DataType::FLOAT32)
+ return false;
+ // TODO support more Activations
+ if (add->fusedActivationFunction() != luci::FusedActFunc::NONE &&
+ add->fusedActivationFunction() != luci::FusedActFunc::RELU6)
+ return false;
+
+ // get weight of dwconv
+ auto filter = dynamic_cast<luci::CircleConst *>(dwconv->filter());
+ if (not filter)
+ return false;
+ if (filter->dtype() != loco::DataType::FLOAT32)
+ return false;
+ if (filter->rank() != 4)
+ return false;
+
+ // check attributes of dwconv
+ if (dwconv->fusedActivationFunction() != luci::FusedActFunc::NONE)
+ return false;
+ if (dwconv->depthMultiplier() < 0) // can this happen?
+ return false;
+
+ // get bias of dwconv
+ auto bias = dynamic_cast<luci::CircleConst *>(dwconv->bias());
+ if (not bias)
+ return false;
+ if (bias->dtype() != loco::DataType::FLOAT32)
+ return false;
+ if (bias->rank() != 1)
+ return false;
+
+ // filter represents as [1, H, W, C*M] where M is multiplier.
+ auto filter_out_chn = filter->dim(3).value();
+ auto multiplier = static_cast<uint32_t>(dwconv->depthMultiplier());
+ auto srank = scale->rank(); // as rank can be 1 or 4
+ if (filter_out_chn != scale->dim(srank - 1).value() * multiplier)
+ return false;
+ srank = shift->rank();
+ if (filter_out_chn != shift->dim(srank - 1).value() * multiplier)
+ return false;
+ auto channel = filter_out_chn / multiplier;
+
+ auto name = add->name();
+ assert(name.length() > 0);
+
+ loco::Graph *graph = add->graph();
+ luci::CircleDepthwiseConv2D *fused_dwconv = graph->nodes()->create<luci::CircleDepthwiseConv2D>();
+ luci::CircleConst *fused_filter = graph->nodes()->create<luci::CircleConst>();
+ luci::CircleConst *fused_bias = graph->nodes()->create<luci::CircleConst>();
+
+ auto filter_in_chn = filter->dim(0).value();
+ auto filter_height = filter->dim(1).value();
+ auto filter_width = filter->dim(2).value();
+ assert(filter_in_chn == 1);
+
+ // Copy filter shape
+ fused_filter->dtype(filter->dtype());
+ fused_filter->size<loco::DataType::FLOAT32>(filter->size<loco::DataType::FLOAT32>());
+ fused_filter->rank(4);
+ fused_filter->dim(0).set(filter_in_chn);
+ fused_filter->dim(1).set(filter_height);
+ fused_filter->dim(2).set(filter_width);
+ fused_filter->dim(3).set(filter_out_chn);
+ fused_filter->shape_status(luci::ShapeStatus::VALID);
+ fused_filter->name(name + "/DepthwiseConv2D/filter");
+
+ // fused filter weight = filter weight * mul(scale) + add(shift)
+ for (uint32_t b = 0; b < filter_in_chn; b++)
+ {
+ for (uint32_t h = 0; h < filter_height; h++)
+ {
+ for (uint32_t w = 0; w < filter_width; w++)
+ {
+ for (uint32_t c = 0; c < filter_out_chn; c++)
+ {
+ uint32_t offset = b * filter_height * filter_width * filter_out_chn +
+ h * filter_width * filter_out_chn + w * filter_out_chn + c;
+ uint32_t chn = c / multiplier;
+ fused_filter->at<loco::DataType::FLOAT32>(offset) =
+ filter->at<loco::DataType::FLOAT32>(offset) * scale->at<loco::DataType::FLOAT32>(chn);
+ }
+ }
+ }
+ }
+
+ // Fuse bias with scale and shift
+ fused_bias->dtype(shift->dtype());
+ fused_bias->size<loco::DataType::FLOAT32>(shift->size<loco::DataType::FLOAT32>());
+ fused_bias->rank(1);
+ fused_bias->dim(0).set(channel);
+ fused_bias->shape_status(luci::ShapeStatus::VALID);
+ for (uint32_t c = 0; c < channel; ++c)
+ {
+ fused_bias->at<loco::DataType::FLOAT32>(c) =
+ bias->at<loco::DataType::FLOAT32>(c) * scale->at<loco::DataType::FLOAT32>(c) +
+ shift->at<loco::DataType::FLOAT32>(c);
+ }
+ fused_bias->name(name + "/DepthwiseConv2D/bias");
+
+ // set new tconv properties
+ fused_dwconv->input(dwconv->input());
+ fused_dwconv->filter(fused_filter);
+ fused_dwconv->bias(fused_bias);
+ fused_dwconv->fusedActivationFunction(add->fusedActivationFunction());
+ fused_dwconv->padding(dwconv->padding());
+ fused_dwconv->stride()->h(dwconv->stride()->h());
+ fused_dwconv->stride()->w(dwconv->stride()->w());
+ fused_dwconv->depthMultiplier(dwconv->depthMultiplier());
+ fused_dwconv->dilation()->h(dwconv->dilation()->h());
+ fused_dwconv->dilation()->w(dwconv->dilation()->w());
+ fused_dwconv->name(name + "/DepthwiseConv2D");
+ luci::add_origin(fused_dwconv,
+ luci::composite_origin(
+ {luci::get_origin(add), luci::get_origin(mul), luci::get_origin(dwconv)}));
+
+ replace(add).with(fused_dwconv);
+
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool FuseBatchNormWithDwConvPass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ if (auto add = dynamic_cast<luci::CircleAdd *>(node))
+ {
+ if (fused_batch_norm_with_dwconv(add))
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseBatchNormWithDwConvPass.h"
+
+#include <gtest/gtest.h>
+
+TEST(FuseBatchNormWithDwConvPassTest, name)
+{
+ luci::FuseBatchNormWithDwConvPass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci/Pass/FuseBatchNormWithTConv.h"
-
-#include <luci/IR/CircleNodes.h>
-
-namespace
-{
-/**
- * NOTE TF's fusedBatchNorm is converted to mul and add of Circle.
- *
- * BEFORE
- *
- * [CircleTransposeConv]
- * |
- * [mul]
- * |
- * [add]
- * AFTER
- *
- * [CircleTransposeConv]
- */
-bool fused_batch_norm_with_tconv(luci::CircleTransposeConv *tconv)
-{
- // check whether it has bias or not. This optimization works only if it doesn't.
- auto bias = dynamic_cast<luci::CircleOutputExclude *>(tconv->bias());
- if (not bias)
- return false;
-
- // get weight of tconv
- auto filter = dynamic_cast<luci::CircleConst *>(tconv->filter());
- if (not filter)
- return false;
- if (filter->dtype() != loco::DataType::FLOAT32)
- return false;
-
- // get mul node
- auto tconv_output = loco::succs(tconv);
- assert(tconv_output.size() == 1);
- auto mul = dynamic_cast<luci::CircleMul *>(*tconv_output.begin());
- if (not mul)
- return false;
- if (mul->dtype() != loco::DataType::FLOAT32)
- return false;
-
- // get add node
- auto mul_output = loco::succs(mul);
- assert(mul_output.size() == 1);
- auto add = dynamic_cast<luci::CircleAdd *>(*mul_output.begin());
- if (not add)
- return false;
- if (add->dtype() != loco::DataType::FLOAT32)
- return false;
- if (add->fusedActivationFunction() != luci::FusedActFunc::NONE &&
- add->fusedActivationFunction() != luci::FusedActFunc::RELU6)
- return false;
-
- // get scale of batchnorm
- auto scale = dynamic_cast<luci::CircleConst *>(mul->y());
- if (not scale)
- return false;
-
- // scale dim(0) == tconv filter channel dim
- if (filter->rank() != 4)
- return false;
- auto filter_out_dim = filter->dim(0).value();
- if (scale->rank() != 1)
- return false;
- auto scale_dim = scale->dim(0).value();
- if (filter_out_dim != scale_dim)
- return false;
-
- // get shift of batchnorm
- auto shift = dynamic_cast<luci::CircleConst *>(add->y());
- if (not shift)
- return false;
-
- // shift dim(0) == tconv filter channel dim
- if (shift->rank() != 1)
- return false;
- auto shift_dim = shift->dim(0).value();
- if (filter_out_dim != shift_dim)
- return false;
-
- // filter weight = filter weight * mul(scale) + add(shift)
- uint32_t filter_height_dim = filter->dim(1).value();
- uint32_t filter_width_dim = filter->dim(2).value();
- uint32_t filter_in_dim = filter->dim(3).value();
- for (uint32_t c = 0; c < filter_out_dim; c++)
- {
- for (uint32_t h = 0; h < filter_height_dim; h++)
- {
- for (uint32_t w = 0; w < filter_width_dim; w++)
- {
- for (uint32_t b = 0; b < filter_in_dim; b++)
- {
- uint32_t offset = c * filter_height_dim * filter_width_dim * filter_in_dim +
- h * filter_width_dim * filter_in_dim + w * filter_in_dim + b;
- filter->at<loco::DataType::FLOAT32>(offset) *= scale->at<loco::DataType::FLOAT32>(c);
- }
- }
- }
- }
-
- // fuse shift with transposed conv
- tconv->bias(shift);
-
- if (add->fusedActivationFunction() == luci::FusedActFunc::RELU6)
- {
- // separate relu op from add op
- auto relu = add->graph()->nodes()->create<luci::CircleRelu6>();
- relu->features(tconv);
-
- // remove mul node
- replace(add).with(relu);
- }
- else
- {
- replace(add).with(tconv);
- }
-
- return true;
-}
-
-} // namespace
-
-namespace luci
-{
-
-bool FuseBatchNormWithTConvPass::run(loco::Graph *g)
-{
- bool changed = false;
- for (auto node : loco::active_nodes(loco::output_nodes(g)))
- {
- auto tconv = dynamic_cast<luci::CircleTransposeConv *>(node);
- if (not tconv)
- continue;
-
- changed |= fused_batch_norm_with_tconv(tconv);
- }
-
- return changed;
-}
-
-} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseBatchNormWithTConvPass.h"
+
+#include "helpers/NodeFiller.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+/**
+ * Fuse Mul-Add to TransposeConv if possible.
+ *
+ * NOTE TF's BatchNormalization is converted to Mul and Add.
+ *
+ * BEFORE
+ * | [CircleOutputExclude]
+ * | / [CircleConst]
+ * | / /
+ * [CircleTransposeConv] [CircleConst]
+ * | /
+ * [CircleMul] [CircleConst]
+ * | /
+ * [CircleAdd]
+ * |
+ *
+ * AFTER
+ * | [CircleOutputExclude]
+ * +-------------------------------------+ / [CircleConst]
+ * | | / /
+ * | [CircleTransposeConv] [CircleConst]
+ * | [CircleConst] | /
+ * | / [CircleConst] [CircleMul] [CircleConst]
+ * | / / | /
+ * [CircleTransposeConv] [CircleAdd]
+ * |
+ * ([CircleRelu6])
+ * |
+ *
+ * Note: CircleRelu6 is inserted if Add activation is ReLU6
+ */
+bool fused_batch_norm_with_tconv(luci::CircleAdd *add)
+{
+ assert(add != nullptr);
+
+ // Find the pattern of CircleTransposeConv - CircleMul - CircleAdd
+ luci::CircleConst *scale = nullptr;
+ luci::CircleConst *shift = nullptr;
+ luci::CircleTransposeConv *tconv = nullptr;
+ luci::CircleMul *mul = nullptr;
+ if (not luci::fill(&shift, &mul).with_commutative_args_of(add))
+ return false;
+ if (not luci::fill(&scale, &tconv).with_commutative_args_of(mul))
+ return false;
+
+ // check scale and shift constant attributes
+ if (scale->rank() != 1)
+ return false;
+ if (shift->rank() != 1)
+ return false;
+ // check mul, add attributes
+ if (mul->dtype() != loco::DataType::FLOAT32)
+ return false;
+ if (add->dtype() != loco::DataType::FLOAT32)
+ return false;
+ if (add->fusedActivationFunction() != luci::FusedActFunc::NONE &&
+ add->fusedActivationFunction() != luci::FusedActFunc::RELU6)
+ return false;
+
+ // tconv bias should be not set
+ if (not dynamic_cast<luci::CircleOutputExclude *>(tconv->bias()))
+ return false;
+
+ // get weight of tconv
+ auto filter = dynamic_cast<luci::CircleConst *>(tconv->filter());
+ if (not filter)
+ return false;
+ if (filter->dtype() != loco::DataType::FLOAT32)
+ return false;
+ if (filter->rank() != 4)
+ return false;
+
+ auto filter_out_chn = filter->dim(0).value();
+ if (filter_out_chn != scale->dim(0).value())
+ return false;
+ if (filter_out_chn != shift->dim(0).value())
+ return false;
+
+ auto name = add->name();
+ assert(name.length() > 0);
+
+ loco::Graph *graph = add->graph();
+ luci::CircleTransposeConv *fused_tconv = graph->nodes()->create<luci::CircleTransposeConv>();
+ luci::CircleConst *fused_filter = graph->nodes()->create<luci::CircleConst>();
+ luci::CircleConst *fused_bias = graph->nodes()->create<luci::CircleConst>();
+
+ auto filter_height = filter->dim(1).value();
+ auto filter_width = filter->dim(2).value();
+ auto filter_in_chn = filter->dim(3).value();
+
+ // Copy filter shape
+ fused_filter->dtype(filter->dtype());
+ fused_filter->size<loco::DataType::FLOAT32>(filter->size<loco::DataType::FLOAT32>());
+ fused_filter->rank(4);
+ fused_filter->dim(0).set(filter_out_chn);
+ fused_filter->dim(1).set(filter_height);
+ fused_filter->dim(2).set(filter_width);
+ fused_filter->dim(3).set(filter_in_chn);
+ fused_filter->shape_status(luci::ShapeStatus::VALID);
+ fused_filter->name(name + "/TransposeConv/filter");
+
+ // fused filter weight = filter weight * mul(scale) + add(shift)
+ for (uint32_t c = 0; c < filter_out_chn; c++)
+ {
+ for (uint32_t h = 0; h < filter_height; h++)
+ {
+ for (uint32_t w = 0; w < filter_width; w++)
+ {
+ for (uint32_t b = 0; b < filter_in_chn; b++)
+ {
+ uint32_t offset = c * filter_height * filter_width * filter_in_chn +
+ h * filter_width * filter_in_chn + w * filter_in_chn + b;
+ fused_filter->at<loco::DataType::FLOAT32>(offset) =
+ filter->at<loco::DataType::FLOAT32>(offset) * scale->at<loco::DataType::FLOAT32>(c);
+ }
+ }
+ }
+ }
+
+ // Copy fused_bias from shift
+ fused_bias->dtype(shift->dtype());
+ fused_bias->size<loco::DataType::FLOAT32>(shift->size<loco::DataType::FLOAT32>());
+ fused_bias->rank(1);
+ fused_bias->dim(0).set(filter_out_chn);
+ fused_bias->shape_status(luci::ShapeStatus::VALID);
+ for (uint32_t c = 0; c < filter_out_chn; ++c)
+ {
+ fused_bias->at<loco::DataType::FLOAT32>(c) = shift->at<loco::DataType::FLOAT32>(c);
+ }
+ fused_bias->name(name + "/TransposeConv/bias");
+
+ // set new tconv properties
+ fused_tconv->inputSizes(tconv->inputSizes());
+ fused_tconv->filter(fused_filter);
+ fused_tconv->outBackprop(tconv->outBackprop());
+ fused_tconv->bias(fused_bias);
+ fused_tconv->padding(tconv->padding());
+ fused_tconv->stride()->h(tconv->stride()->h());
+ fused_tconv->stride()->w(tconv->stride()->w());
+ fused_tconv->name(name + "/TransposeConv");
+ luci::add_origin(fused_tconv,
+ luci::composite_origin(
+ {luci::get_origin(add), luci::get_origin(mul), luci::get_origin(tconv)}));
+
+ if (add->fusedActivationFunction() == luci::FusedActFunc::RELU6)
+ {
+ // separate relu op from add op
+ auto relu = add->graph()->nodes()->create<luci::CircleRelu6>();
+ relu->features(fused_tconv);
+ relu->name(name + "/Relu6");
+ luci::add_origin(relu, luci::get_origin(add));
+
+ replace(add).with(relu);
+ }
+ else
+ {
+ replace(add).with(fused_tconv);
+ }
+
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool FuseBatchNormWithTConvPass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ if (auto add = dynamic_cast<luci::CircleAdd *>(node))
+ {
+ if (fused_batch_norm_with_tconv(add))
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseBatchNormWithTConvPass.h"
+
+#include <gtest/gtest.h>
+
+TEST(FuseBatchNormWithTConvPassTest, name)
+{
+ luci::FuseBatchNormWithTConvPass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
*/
#include "luci/Pass/FuseInstanceNormPass.h"
+#include "helpers/NodeFiller.h"
#include "FuseInstanceNormPassInternal.h"
#include <luci/IR/CircleNodes.h>
-#include <loco/Service/ShapeInference.h>
+#include <luci/Profile/CircleNodeOrigin.h>
#include <cassert>
#include <set>
-// Helper to find commutative node's arguments
-namespace
-{
-
-/**
- * INTRODUCTION
- * Binary operation f(x,y) is 'commutative' when
- * f(x,y) == f(y,x) holds for all x, y.
- * For examples, ADD, MUL and SQUARED_DIFFERENCE are commutative.
- * These helpers make it easy to find commutative arguemnts of commtative node.
- *
- * HOW TO USE
- * COMM_NODE *node;
- * ARG_TYPE_1 *arg1;
- * ARG_TYPE_2 *arg2;
- *
- * bool ok = fill(&arg1, &arg2).with_commutative_args_of(node);
- *
- * Result
- * If 'node's commutative argument types are actually {ARG_TYPE_1, ARG_TYPE_2}
- * (as a set), 'arg1' and 'arg2' set as actual 'node's arguemnts with matching
- * type, and return value 'ok' is true.
- * Otherwise, 'arg1' and 'arg2' not changed, 'ok' is false.
- */
-
-template <class ARG_TYPE_1, class ARG_TYPE_2> class NodeFiller final
-{
-public:
- NodeFiller(ARG_TYPE_1 **arg_1, ARG_TYPE_2 **arg_2) : _arg_1(arg_1), _arg_2(arg_2)
- {
- // DO NOTHING
- }
-
- /**
- * @return true When 'node's argument types are 'ARG_TYPE_1' and 'ARG_TYPE_2'
- * In such case, it assign '_arg_1' and '_arg_2' to actual arguments
- *
- * @return false When 'node's argument types are NOT matched with 'ARG_TYPE_*'
- * In such case, it does not amend '_arg_1' and '_arg_2'
- *
- * @require COMM_NODE has member x() and y()
- */
- template <class COMM_NODE> bool with_commutative_args_of(const COMM_NODE *node);
-
-private:
- ARG_TYPE_1 **_arg_1;
- ARG_TYPE_2 **_arg_2;
-};
-
-template <class ARG_TYPE_1, class ARG_TYPE_2>
-inline NodeFiller<ARG_TYPE_1, ARG_TYPE_2> fill(ARG_TYPE_1 **arg_1, ARG_TYPE_2 **arg_2)
-{
- return NodeFiller<ARG_TYPE_1, ARG_TYPE_2>{arg_1, arg_2};
-}
-
-template <class ARG_TYPE_1, class ARG_TYPE_2>
-template <class COMM_NODE>
-bool NodeFiller<ARG_TYPE_1, ARG_TYPE_2>::with_commutative_args_of(const COMM_NODE *node)
-{
- // Case 1) X == ARG_TYPE_1 / Y == ARG_TYPE_2
- {
- auto x = dynamic_cast<ARG_TYPE_1 *>(node->x());
- auto y = dynamic_cast<ARG_TYPE_2 *>(node->y());
-
- if (x && y)
- {
- *_arg_1 = x;
- *_arg_2 = y;
- return true;
- }
- }
-
- // Case 2) X == ARG_TYPE_2 / Y == ARG_TYPE_1
- {
- auto x = dynamic_cast<ARG_TYPE_2 *>(node->x());
- auto y = dynamic_cast<ARG_TYPE_1 *>(node->y());
-
- if (x && y)
- {
- *_arg_1 = y;
- *_arg_2 = x;
- return true;
- }
- }
-
- return false;
-}
-
-} // namespace
-
// Helper to check detail
/// @return true When node has shape of '1 x .. x 1 x depth'
//
// CHECK 1) input is rank 4
//
- auto input = mean->input();
- if (not loco::shape_known(input))
+ auto input = loco::must_cast<luci::CircleNode *>(mean->input());
+ if (input->shape_status() != luci::ShapeStatus::VALID)
return false;
- auto input_shape = loco::shape_get(input).as<loco::TensorShape>();
- if (input_shape.rank() != 4)
+ if (input->rank() != 4)
return false;
//
//
// CHECK 1) input is rank 5 (NHWCX)
//
- auto input = mean->input();
- if (not loco::shape_known(input))
+ auto input = loco::must_cast<luci::CircleNode *>(mean->input());
+ if (input->shape_status() != luci::ShapeStatus::VALID)
return false;
- auto input_shape = loco::shape_get(input).as<loco::TensorShape>();
- if (input_shape.rank() != 5)
+ if (input->rank() != 5)
return false;
//
// So it is handled in the separate if statement
if (_pv == PatternVersion::Version_2)
{
- CHECK_OR_FALSE(fill(&mul_gamma, &const_as_beta).with_commutative_args_of(add_as_terminal));
- CHECK_OR_FALSE(fill(&div, &const_as_gamma).with_commutative_args_of(mul_gamma));
+ CHECK_OR_FALSE(
+ luci::fill(&mul_gamma, &const_as_beta).with_commutative_args_of(add_as_terminal));
+ CHECK_OR_FALSE(luci::fill(&div, &const_as_gamma).with_commutative_args_of(mul_gamma));
sub = dynamic_cast<luci::CircleSub *>(div->x());
CHECK_OR_FALSE(sub);
luci::CircleNode *ifm_node = loco::must_cast<luci::CircleNode *>(ifm);
CHECK_OR_FALSE(ifm_node->rank() == 4);
+ CHECK_OR_FALSE(ifm_node->dim(3).known());
uint32_t ifm_channel_depth = ifm_node->dim(3).value();
mean_of_ifm = dynamic_cast<luci::CircleMean *>(sub->y());
CHECK_OR_FALSE(zero_point_five->at<loco::DataType::FLOAT32>(0) == 0.5);
CHECK_OR_FALSE(
- fill(&mean_as_variance, &const_as_epsilon).with_commutative_args_of(add_as_variance));
+ luci::fill(&mean_as_variance, &const_as_epsilon).with_commutative_args_of(add_as_variance));
CHECK_OR_FALSE(const_as_epsilon->dtype() == loco::DataType::FLOAT32);
// TODO Support regarding broadcast
CHECK_OR_FALSE(const_as_epsilon->size<loco::DataType::FLOAT32>() == 1);
loco::Node *ifm_should_be = nullptr;
luci::CircleMean *mean_of_ifm_should_be = nullptr;
- CHECK_OR_FALSE(fill(&ifm_should_be, &mean_of_ifm_should_be).with_commutative_args_of(sqdiff));
+ CHECK_OR_FALSE(
+ luci::fill(&ifm_should_be, &mean_of_ifm_should_be).with_commutative_args_of(sqdiff));
CHECK_OR_FALSE(ifm == ifm_should_be);
CHECK_OR_FALSE(mean_of_ifm == mean_of_ifm_should_be);
if (_pv == PatternVersion::Version_0)
{
- CHECK_OR_FALSE(fill(&mul_as_scaled_ifm, &sub).with_commutative_args_of(add_as_terminal));
- CHECK_OR_FALSE(fill(&ifm, &mul_gamma).with_commutative_args_of(mul_as_scaled_ifm));
+ CHECK_OR_FALSE(luci::fill(&mul_as_scaled_ifm, &sub).with_commutative_args_of(add_as_terminal));
+ CHECK_OR_FALSE(luci::fill(&ifm, &mul_gamma).with_commutative_args_of(mul_as_scaled_ifm));
}
if (_pv == PatternVersion::Version_1)
{
- CHECK_OR_FALSE(fill(&mul_as_scaled_reshape, &sub).with_commutative_args_of(add_as_terminal));
CHECK_OR_FALSE(
- fill(&reshape_of_ifm, &mul_gamma).with_commutative_args_of(mul_as_scaled_reshape));
+ luci::fill(&mul_as_scaled_reshape, &sub).with_commutative_args_of(add_as_terminal));
+ CHECK_OR_FALSE(
+ luci::fill(&reshape_of_ifm, &mul_gamma).with_commutative_args_of(mul_as_scaled_reshape));
ifm = reshape_of_ifm->tensor();
}
- CHECK_OR_FALSE(loco::shape_known(ifm));
- auto ifm_shape = loco::shape_get(ifm);
- CHECK_OR_FALSE(ifm_shape.domain() == loco::Domain::Tensor);
- auto ifm_tensor_shape = ifm_shape.as<loco::TensorShape>();
- CHECK_OR_FALSE(ifm_tensor_shape.rank() == 4);
- uint32_t ifm_channel_depth = ifm_tensor_shape.dim(3).value();
+ auto ifm_circle = loco::must_cast<luci::CircleNode *>(ifm);
+ CHECK_OR_FALSE(ifm_circle->shape_status() == luci::ShapeStatus::VALID);
+ CHECK_OR_FALSE(ifm_circle->rank() == 4);
+ CHECK_OR_FALSE(ifm_circle->dim(3).known());
+ uint32_t ifm_channel_depth = ifm_circle->dim(3).value();
- CHECK_OR_FALSE(fill(&rsqrt, &const_as_gamma).with_commutative_args_of(mul_gamma));
+ CHECK_OR_FALSE(luci::fill(&rsqrt, &const_as_gamma).with_commutative_args_of(mul_gamma));
if (_pv == PatternVersion::Version_0)
{
CHECK_OR_FALSE(add_as_variance);
CHECK_OR_FALSE(
- fill(&mean_as_variance, &const_as_epsilon).with_commutative_args_of(add_as_variance));
+ luci::fill(&mean_as_variance, &const_as_epsilon).with_commutative_args_of(add_as_variance));
CHECK_OR_FALSE(const_as_epsilon->dtype() == loco::DataType::FLOAT32);
// TODO Support regarding broadcast
if (_pv == PatternVersion::Version_0)
{
loco::Node *ifm_should_be = nullptr;
- CHECK_OR_FALSE(fill(&ifm_should_be, &mean_of_ifm).with_commutative_args_of(sqdiff));
+ CHECK_OR_FALSE(luci::fill(&ifm_should_be, &mean_of_ifm).with_commutative_args_of(sqdiff));
CHECK_OR_FALSE(ifm == ifm_should_be);
CHECK_OR_FALSE(is_instance_mean_v0(mean_of_ifm));
CHECK_OR_FALSE(ifm == mean_of_ifm->input());
if (_pv == PatternVersion::Version_1)
{
loco::Node *reshape_should_be = nullptr;
- CHECK_OR_FALSE(fill(&reshape_should_be, &mean_of_reshape).with_commutative_args_of(sqdiff));
+ CHECK_OR_FALSE(
+ luci::fill(&reshape_should_be, &mean_of_reshape).with_commutative_args_of(sqdiff));
CHECK_OR_FALSE(reshape_of_ifm == reshape_should_be);
CHECK_OR_FALSE(is_instance_mean_v1(mean_of_reshape));
CHECK_OR_FALSE(reshape_of_ifm == mean_of_reshape->input());
if (_pv == PatternVersion::Version_0)
{
- CHECK_OR_FALSE(fill(&mul_gamma_should_be, &mean_of_ifm_should_be)
- .with_commutative_args_of(mul_as_scaled_mean));
+ CHECK_OR_FALSE(luci::fill(&mul_gamma_should_be, &mean_of_ifm_should_be)
+ .with_commutative_args_of(mul_as_scaled_mean));
CHECK_OR_FALSE(mul_gamma == mul_gamma_should_be);
CHECK_OR_FALSE(mean_of_ifm == mean_of_ifm_should_be);
}
if (_pv == PatternVersion::Version_1)
{
- CHECK_OR_FALSE(fill(&mul_gamma_should_be, &mean_of_reshape_should_be)
- .with_commutative_args_of(mul_as_scaled_mean));
+ CHECK_OR_FALSE(luci::fill(&mul_gamma_should_be, &mean_of_reshape_should_be)
+ .with_commutative_args_of(mul_as_scaled_mean));
CHECK_OR_FALSE(mul_gamma == mul_gamma_should_be);
CHECK_OR_FALSE(mean_of_reshape == mean_of_reshape_should_be);
}
auto graph = p.add_as_terminal->graph();
- // Special case for version 2 (no need to reshape)
- if (p.version() == InstanceNormPattern::Version_2)
+ // Version 0 and 1 need to reshape
+ if (p.version() != InstanceNormPattern::Version_2)
{
- // Make Instance Norm to replace
- auto instance_norm = graph->nodes()->create<luci::CircleInstanceNorm>();
- instance_norm->input(p.ifm);
- instance_norm->gamma(p.const_as_gamma);
- instance_norm->beta(p.const_as_beta);
- float epsilon = p.const_as_epsilon->at<loco::DataType::FLOAT32>(0);
- instance_norm->epsilon(epsilon);
- instance_norm->fusedActivationFunction(p.add_as_terminal->fusedActivationFunction());
-
- replace(p.add_as_terminal).with(instance_norm);
-
- return;
- }
-
- // Make reshape for gamma & beta
- auto reshape_gamma = graph->nodes()->create<luci::CircleReshape>();
- auto reshape_beta = graph->nodes()->create<luci::CircleReshape>();
- {
- auto ifm_shape = loco::shape_get(p.ifm).as<loco::TensorShape>();
- uint32_t ifm_channel_depth = ifm_shape.dim(3).value();
-
- int32_t new_shape[1] = {static_cast<int32_t>(ifm_channel_depth)};
-
- reshape_gamma->tensor(p.const_as_gamma);
- reshape_beta->tensor(p.const_as_beta);
+ p.const_as_gamma->rank(1);
+ p.const_as_gamma->dim(0).set(p.const_as_gamma->size<loco::DataType::FLOAT32>());
+ p.const_as_beta->rank(1);
+ p.const_as_beta->dim(0).set(p.const_as_beta->size<loco::DataType::FLOAT32>());
- luci::set_new_shape(reshape_gamma, new_shape, 1);
- luci::set_new_shape(reshape_beta, new_shape, 1);
+ p.const_as_gamma->shape_status(luci::ShapeStatus::UNDEFINED);
+ p.const_as_beta->shape_status(luci::ShapeStatus::UNDEFINED);
}
// Make Instance Norm to replace
auto instance_norm = graph->nodes()->create<luci::CircleInstanceNorm>();
instance_norm->input(p.ifm);
- instance_norm->gamma(reshape_gamma);
- instance_norm->beta(reshape_beta);
+ instance_norm->gamma(p.const_as_gamma);
+ instance_norm->beta(p.const_as_beta);
float epsilon = p.const_as_epsilon->at<loco::DataType::FLOAT32>(0);
instance_norm->epsilon(epsilon);
instance_norm->fusedActivationFunction(p.add_as_terminal->fusedActivationFunction());
+ // NOTE unique name should be assigned in export
+ instance_norm->name("InstanceNorm");
+
+ // set origin
+ std::vector<std::shared_ptr<luci::CircleNodeOrigin>> origin_vec{
+ luci::get_origin(p.sqdiff),
+ luci::get_origin(p.mean_as_variance),
+ luci::get_origin(p.add_as_variance),
+ luci::get_origin(p.mul_gamma),
+ luci::get_origin(p.sub),
+ luci::get_origin(p.add_as_terminal)};
+ if (p.version() == InstanceNormPattern::PatternVersion::Version_0)
+ {
+ origin_vec.push_back(luci::get_origin(p.mean_of_ifm));
+ origin_vec.push_back(luci::get_origin(p.rsqrt));
+ origin_vec.push_back(luci::get_origin(p.mul_as_scaled_ifm));
+ origin_vec.push_back(luci::get_origin(p.mul_as_scaled_mean));
+ }
+ if (p.version() == InstanceNormPattern::PatternVersion::Version_1)
+ {
+ origin_vec.push_back(luci::get_origin(p.reshape_of_ifm));
+ origin_vec.push_back(luci::get_origin(p.mean_of_reshape));
+ origin_vec.push_back(luci::get_origin(p.rsqrt));
+ origin_vec.push_back(luci::get_origin(p.mul_as_scaled_mean));
+ origin_vec.push_back(luci::get_origin(p.mul_as_scaled_reshape));
+ }
+ if (p.version() == InstanceNormPattern::PatternVersion::Version_2)
+ {
+ origin_vec.push_back(luci::get_origin(p.mean_of_ifm));
+ origin_vec.push_back(luci::get_origin(p.pow));
+ origin_vec.push_back(luci::get_origin(p.div));
+ }
+ luci::add_origin(instance_norm, luci::composite_origin(origin_vec));
replace(p.add_as_terminal).with(instance_norm);
}
#include "FuseInstanceNormPassInternal.h"
+#include "luci/Pass/FuseInstanceNormPass.h"
+
#include <vector>
#include <gtest/gtest.h>
} // namespace
+TEST(FuseInstanceNormPassTest, name)
+{
+ luci::FuseInstanceNormPass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
TEST(FuseInstanceNormPass, is_quasi_1D_with_dummy_dim)
{
luci::CircleConst const_node;
#include "luci/Pass/FusePreActivationBatchNormPass.h"
#include "FusePreActivationBatchNormPassInternal.h"
+#include "BatchNormPatternFinder.h"
#include <luci/IR/CircleNodes.h>
#include <luci/Log.h>
+#include <luci/Profile/CircleNodeOrigin.h>
namespace
{
return true;
}
-// Check if mul is batchnorm mul
-bool is_batchnorm_mul(const luci::CircleMul *mul, luci::CircleNode *&pred_node,
- luci::CircleConst *&gamma)
-{
- auto x = dynamic_cast<luci::CircleConst *>(mul->x());
- auto y = dynamic_cast<luci::CircleConst *>(mul->y());
-
- luci::CircleNode *pred = nullptr;
- luci::CircleConst *constant = nullptr;
-
- if (x != nullptr && y == nullptr)
- {
- pred = loco::must_cast<luci::CircleNode *>(mul->y());
- constant = x;
- }
- else if (x == nullptr && y != nullptr)
- {
- pred = loco::must_cast<luci::CircleNode *>(mul->x());
- constant = y;
- }
- else
- {
- return false;
- }
-
- if (constant->rank() != 1)
- return false;
-
- auto channel_dim = constant->dim(0);
- if (!(channel_dim == mul->dim(mul->rank() - 1)))
- return false;
-
- pred_node = pred;
- gamma = constant;
- return true;
-}
-
-// Check if add is batchnorm add
-bool is_batchnorm_add(const luci::CircleAdd *add, luci::CircleMul *&mul, luci::CircleConst *&beta)
-{
- auto x = loco::must_cast<luci::CircleNode *>(add->x());
- auto y = loco::must_cast<luci::CircleNode *>(add->y());
-
- luci::CircleMul *pred = nullptr;
- luci::CircleConst *constant = nullptr;
-
- if (add->fusedActivationFunction() != luci::FusedActFunc::RELU)
- return false;
-
- if (x->opcode() == luci::CircleOpcode::CIRCLECONST && y->opcode() == luci::CircleOpcode::MUL)
- {
- pred = loco::must_cast<luci::CircleMul *>(y);
- constant = loco::must_cast<luci::CircleConst *>(x);
- }
- else if (x->opcode() == luci::CircleOpcode::MUL && y->opcode() == luci::CircleOpcode::CIRCLECONST)
- {
- pred = loco::must_cast<luci::CircleMul *>(x);
- constant = loco::must_cast<luci::CircleConst *>(y);
- }
- else
- {
- return false;
- }
-
- if (constant->rank() != 1)
- return false;
-
- auto channel_dim = constant->dim(0);
- // Assumption: Layout is channel-last
- if (!(channel_dim == add->dim(add->rank() - 1)))
- return false;
-
- mul = pred;
- beta = constant;
- return true;
-}
-
const luci::CircleConv2D *get_forward_conv2d(const luci::CircleNode *node, uint32_t channel_size)
{
auto opcode = node->opcode();
auto size = beta->dim(0).value();
auto bias = dynamic_cast<luci::CircleConst *>(conv->bias());
+ auto name = conv->name();
+ assert(name.length() > 0);
+
if (bias == nullptr)
{
bias = conv->graph()->nodes()->create<luci::CircleConst>();
bias->rank(1);
bias->dim(0).set(size);
bias->size<loco::DataType::FLOAT32>(size);
+ bias->name(name + "/bias");
conv->bias(bias);
}
else
luci::CircleSub *insert_sub(luci::CircleNode *pred, luci::CircleConst *beta)
{
+ auto name = pred->name();
+ assert(name.length() > 0);
+
auto sub = pred->graph()->nodes()->create<luci::CircleSub>();
- sub->dtype(loco::DataType::FLOAT32);
- sub->rank(pred->rank());
- for (uint32_t i = 0; i < sub->rank(); i++)
- {
- sub->dim(i).set(pred->dim(i).value());
- }
sub->fusedActivationFunction(luci::FusedActFunc::NONE);
+ sub->name(name + "/Sub");
loco::replace(pred).with(sub);
if (!update_conv_bias_with_beta(conv, beta, false))
return false;
+ luci::add_origin(conv, luci::get_origin(sub));
+
auto pred = sub->x();
loco::replace(sub).with(pred);
if (!update_conv_bias_with_beta(conv, beta, true))
return false;
+ luci::add_origin(conv, luci::get_origin(add));
loco::replace(add).with(pred);
add->drop();
if (!update_conv_bias_with_beta(conv, beta, true))
return false;
+ luci::add_origin(conv, luci::get_origin(add));
+
auto relu = *loco::succs(add).begin();
auto relu_node = loco::must_cast<luci::CircleRelu *>(relu);
assert(relu_node != nullptr);
add->drop();
sub_list.push_back(insert_sub(pred, beta));
+ luci::add_origin(sub_list.back(), luci::get_origin(add));
relu_node->features(pred);
// Update CONV weights
update_conv_weights_with_gamma(conv, gamma);
+
+ // Update origin
+ // TODO need to remove const
+ luci::add_origin(const_cast<luci::CircleConv2D *>(conv),
+ luci::get_origin(loco::must_cast<luci::CircleNode *>(mul)));
}
loco::replace(mul).with(pred_node);
if (!is_batchnorm_add(add, mul, beta))
return false;
+ if (add->fusedActivationFunction() != luci::FusedActFunc::RELU)
+ return false;
if (loco::succs(mul).size() != 1)
return false;
return false;
// Insert Relu at the bottom
+ auto name = add->name();
+ assert(name.length() > 0);
+
auto relu = add->graph()->nodes()->create<luci::CircleRelu>();
relu->features(mul);
+ relu->name(name + "/Relu");
+ luci::add_origin(relu, luci::get_origin(add));
loco::replace(add).with(relu);
// Replace beta <- beta / gamma
#include "FusePreActivationBatchNormPassInternal.h"
+#include "luci/Pass/FusePreActivationBatchNormPass.h"
+
#include <luci/IR/CircleNodes.h>
#include <math.h>
conv_filter->at<loco::DataType::FLOAT32>(i * out_size + j) = i * out_size + j;
}
}
+
+ pred_conv->name("pred_conv");
+ pred_conv_filter->name("pred_conv_filter");
+ pred_conv_bias->name("pred_conv_bias");
+ pred_conv2->name("pred_conv2");
+ pred_conv2_filter->name("pred_conv2_filter");
+ pred_conv2_bias->name("pred_conv2_bias");
+ pred_add->name("pred_add");
+ mul->name("mul");
+ mul_gamma->name("mul_gamma");
+ add->name("add");
+ add_beta->name("add_beta");
+ conv->name("conv");
+ conv_filter->name("conv_filter");
+ conv_bias->name("conv_bias");
+ succ_add->name("succ_add");
}
public:
} // namespace
+TEST(FusePreActivationBatchNormPassTest, name)
+{
+ luci::FusePreActivationBatchNormPass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
TEST(FusePreActivationBatchNorm, swap_mul_add)
{
SimpleGraph g;
#include "luci/Pass/MakeBatchNormGammaPositivePass.h"
+#include "BatchNormPatternFinder.h"
+
#include <luci/IR/CircleNodes.h>
namespace
return changed;
}
-// Check if add is batchnorm add
-bool is_batchnorm_add(const luci::CircleAdd *add)
+bool make_positive_gamma(luci::CircleAdd *add)
{
- auto x = dynamic_cast<luci::CircleConst *>(add->x());
- auto y = dynamic_cast<luci::CircleConst *>(add->y());
-
- luci::CircleConst *constant = nullptr;
+ luci::CircleMul *mul = nullptr;
+ luci::CircleConst *beta = nullptr;
+ luci::CircleConst *gamma = nullptr;
+ luci::CircleNode *pred = nullptr;
- if (x != nullptr && y == nullptr)
- constant = x;
- else if (x == nullptr && y != nullptr)
- constant = y;
- else
+ if (!is_batchnorm_add(add, mul, beta))
return false;
- if (constant->rank() != 1)
+ if (loco::succs(mul).size() != 1)
return false;
+ if (!is_batchnorm_mul(mul, pred, gamma))
+ return false;
+ assert(pred == add);
// Only support Relu
if (add->fusedActivationFunction() != luci::FusedActFunc::RELU)
return false;
- auto channel_dim = constant->dim(0);
- if (!(channel_dim == add->dim(add->rank() - 1)))
- return false;
-
- return true;
-}
-
-// Check if mul is batchnorm mul
-bool is_batchnorm_mul(const luci::CircleMul *mul, luci::CircleConst *&gamma)
-{
- auto x = dynamic_cast<luci::CircleConst *>(mul->x());
- auto y = dynamic_cast<luci::CircleConst *>(mul->y());
-
- luci::CircleConst *constant = nullptr;
-
- if (x != nullptr && y == nullptr)
- constant = x;
- else if (x == nullptr && y != nullptr)
- constant = y;
- else
- return false;
-
- if (constant->rank() != 1)
- return false;
-
- auto channel_dim = constant->dim(0);
- if (!(channel_dim == mul->dim(mul->rank() - 1)))
- return false;
-
- // Check successor is batchnorm add
- auto succs = loco::succs(mul);
- if (succs.size() != 1)
- return false;
-
- auto add = dynamic_cast<luci::CircleAdd *>(*succs.begin());
- if (add == nullptr)
- return false;
-
- if (!is_batchnorm_add(add))
- return false;
-
- gamma = constant;
- return true;
+ return negative_gamma_to_positive(gamma);
}
} // namespace
namespace luci
{
+/**
+ * Make negative gamma values of Mul-Add (as BatchNorm) to a small positive value (1e-10)
+ *
+ * PATTERN:
+ * |
+ * [CircleNode] [CircleConst](as gamma)
+ * | |
+ * [CircleMul] [CircleConst]
+ * | |
+ * [CircleAdd]
+ * |
+ */
bool MakeBatchNormGammaPositivePass::run(loco::Graph *g)
{
bool changed = false;
for (auto node : loco::active_nodes(loco::output_nodes(g)))
{
- auto mul = dynamic_cast<luci::CircleMul *>(node);
- if (mul == nullptr)
+ auto add = dynamic_cast<luci::CircleAdd *>(node);
+ if (add == nullptr)
continue;
- luci::CircleConst *gamma;
- if (is_batchnorm_mul(mul, gamma))
- changed = negative_gamma_to_positive(gamma);
+ if (make_positive_gamma(add))
+ changed = true;
}
return changed;
}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/MakeBatchNormGammaPositivePass.h"
+
+#include <gtest/gtest.h>
+
+TEST(MakeBatchNormGammaPositivePassTest, name)
+{
+ luci::MakeBatchNormGammaPositivePass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci/Pass/MigrateLegacyShapeDtypePass.h"
-
-#include <loco/Service/ShapeInference.h>
-#include <loco/Service/TypeInference.h>
-
-#include <luci/IR/CircleNodes.h>
-
-#include <loco.h>
-
-namespace
-{
-
-bool has_same_shape(luci::CircleNode *node, loco::TensorShape shape)
-{
- if (node->rank() != shape.rank())
- return false;
-
- for (uint32_t i = 0; i < shape.rank(); ++i)
- if (!(node->dim(i) == shape.dim(i)))
- return false;
-
- return true;
-}
-
-} // namespace
-
-namespace luci
-{
-
-bool MigrateLegacyShapeDtypePass::run(luci::Module *m)
-{
- bool changed = false;
-
- for (size_t g = 0; g < m->size(); ++g)
- {
- if (run(m->graph(g)))
- changed = true;
- }
-
- return changed;
-}
-
-bool MigrateLegacyShapeDtypePass::run(loco::Graph *g)
-{
- bool changed = false;
-
- for (auto node : loco::all_nodes(g))
- {
- auto circle_node = loco::must_cast<luci::CircleNode *>(node);
- if (loco::shape_known(node))
- {
- auto loco_shape = loco::shape_get(node).as<loco::TensorShape>();
-
- assert(circle_node->shape_signature().rank() == 0 ||
- circle_node->shape_signature().rank() == loco_shape.rank());
-
- // When shape of loco is copied to circle node, ShapeSignature should be applied.
- loco::TensorShape new_shape;
- new_shape.rank(loco_shape.rank());
- for (uint32_t i = 0; i < loco_shape.rank(); ++i)
- {
- if (circle_node->shape_signature().rank() > 0 &&
- circle_node->shape_signature().dim(i) == -1)
- new_shape.dim(i) = 1;
- else
- new_shape.dim(i) = loco_shape.dim(i);
- }
-
- if (circle_node->shape_status() == luci::ShapeStatus::UNDEFINED ||
- !has_same_shape(circle_node, new_shape))
- {
- circle_node->rank(new_shape.rank());
- for (uint32_t i = 0; i < new_shape.rank(); ++i)
- circle_node->dim(i) = new_shape.dim(i);
-
- if (circle_node->shape_status() == luci::ShapeStatus::UNDEFINED)
- circle_node->shape_status(luci::ShapeStatus::VALID);
-
- changed = true;
- }
- }
-
- if (loco::dtype_known(node))
- {
- if (loco::dtype_get(node) != circle_node->dtype())
- {
- circle_node->dtype(loco::dtype_get(node));
- changed = true;
- }
- }
- }
-
- return changed;
-}
-
-} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ModulePhase.h"
+
+#include "luci/Pass/CircleShapeInferencePass.h"
+
+#include <loco.h>
+
+#include <gtest/gtest.h>
+
+TEST(ModulePhaseTest, saturate)
+{
+ auto m = luci::make_module();
+ auto g = loco::make_graph();
+ m->add(std::move(g));
+
+ luci::Phase phase;
+
+ // Any Pass will do for testing
+ phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+
+ luci::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{m.get()};
+ phase_runner.run(phase);
+
+ SUCCEED();
+}
+
+TEST(ModulePhaseTest, restart)
+{
+ auto m = luci::make_module();
+ auto g = loco::make_graph();
+ m->add(std::move(g));
+
+ luci::Phase phase;
+
+ // Any Pass will do for testing
+ phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+
+ luci::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{m.get()};
+ phase_runner.run(phase);
+
+ SUCCEED();
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PASS_TEST_GRAPHS_H__
+#define __LUCI_PASS_TEST_GRAPHS_H__
+
+#include <loco.h>
+#include <luci/IR/CircleNodes.h>
+
+namespace luci
+{
+
+/**
+ * ConstantFoldingTestGraph is a base class for testing
+ * constant folding passes. It creates Input and Output
+ * in the below graph. Child classes must implement Connector
+ * and Folded pattern.
+ *
+ * [Input] [Folded pattern] (Implemented by child class)
+ * \ /
+ * [Connector] (Implemented by child class)
+ * |
+ * [Output]
+ *
+ * Connector should satisfy the below conditions
+ * - Input type == Output type == Folded pattern type
+ * - Input shape == Output shape == Folded pattern shape
+ *
+ * For example, Add, Mul, Sub, .. can be a Connector
+ */
+class ConstantFoldingTestGraph
+{
+public:
+ ConstantFoldingTestGraph(std::vector<uint32_t> input_shape, loco::DataType input_dtype)
+ {
+ _input = _g.nodes()->create<luci::CircleInput>();
+ _output = _g.nodes()->create<luci::CircleOutput>();
+
+ auto graph_input = _g.inputs()->create();
+ _input->index(graph_input->index());
+ auto graph_output = _g.outputs()->create();
+ _output->index(graph_output->index());
+
+ graph_input->dtype(input_dtype);
+ graph_output->dtype(input_dtype);
+ _input->dtype(input_dtype);
+ _output->dtype(input_dtype);
+
+ auto input_tensor_shape = std::make_unique<loco::TensorShape>();
+ input_tensor_shape->rank(input_shape.size());
+ for (int i = 0; i < input_shape.size(); i++)
+ input_tensor_shape->dim(i).set(input_shape[i]);
+ graph_input->shape(std::move(input_tensor_shape));
+
+ auto output_tensor_shape = std::make_unique<loco::TensorShape>();
+ output_tensor_shape->rank(input_shape.size());
+ for (int i = 0; i < input_shape.size(); i++)
+ output_tensor_shape->dim(i).set(input_shape[i]);
+ graph_output->shape(std::move(output_tensor_shape));
+
+ _input->rank(input_shape.size());
+ for (int i = 0; i < input_shape.size(); i++)
+ _input->dim(i).set(input_shape[i]);
+
+ _output->rank(input_shape.size());
+ for (int i = 0; i < input_shape.size(); i++)
+ _output->dim(i).set(input_shape[i]);
+
+ _input->name("input");
+ _output->name("output");
+ }
+
+ virtual void init() = 0;
+
+ virtual ~ConstantFoldingTestGraph() = default;
+
+ virtual loco::Node *createFoldedPattern() = 0;
+
+ virtual luci::CircleConst *getFoldedPattern() = 0;
+
+ loco::Graph *graph() { return &_g; }
+
+ // NOTE: we're not adding _ prefix as these class members are public
+protected:
+ loco::Graph _g;
+ luci::CircleInput *_input = nullptr;
+ luci::CircleOutput *_output = nullptr;
+};
+
+/**
+ * ConstantFoldingTestAddGraph is ConstantFoldingTestGraph
+ * whose Connector is Add.
+ */
+class ConstantFoldingAddTestGraph : public ConstantFoldingTestGraph
+{
+protected:
+ ConstantFoldingAddTestGraph(std::vector<uint32_t> input_shape, loco::DataType input_dtype)
+ : ConstantFoldingTestGraph(input_shape, input_dtype)
+ {
+ _add = _g.nodes()->create<luci::CircleAdd>();
+ _add->dtype(input_dtype);
+
+ _add->rank(input_shape.size());
+ for (int i = 0; i < input_shape.size(); i++)
+ _add->dim(i).set(input_shape[i]);
+
+ _add->x(_input);
+
+ _output->from(_add);
+
+ _add->name("add");
+ }
+
+protected:
+ void init() override { _add->y(createFoldedPattern()); }
+
+protected:
+ luci::CircleConst *getFoldedPattern() override
+ {
+ return dynamic_cast<luci::CircleConst *>(_add->y());
+ }
+
+protected:
+ luci::CircleAdd *_add = nullptr;
+};
+
+} // namespace luci
+
+#endif // __LUCI_PASS_TEST_GRAPHS_H__
{
public:
ProgressReporter(loco::Graph *graph, logo::PhaseStrategy strategy)
- : _graph{graph}, _strategy{strategy}
+ : _graph{graph}, _strategy{strategy}
{
// DO NOTHING
}
{
public:
ModuleProgressReporter(luci::Module *module, logo::PhaseStrategy strategy)
- : _module{module}, _strategy{strategy}
+ : _module{module}, _strategy{strategy}
{
// DO NOTHING
}
public:
ConstInputConcatGraph(loco::DataType quant_type)
{
- concat_node.dtype(quant_type);
- concat_node.fusedActivationFunction(luci::FusedActFunc::NONE);
- input_1.dtype(loco::DataType::FLOAT32);
- input_1.size<loco::DataType::FLOAT32>(5);
+ concat_node = g.nodes()->create<luci::CircleConcatenation>(2);
+ input_1 = g.nodes()->create<luci::CircleConst>();
+ input_2 = g.nodes()->create<luci::CircleConv2D>();
+
+ concat_node->dtype(quant_type);
+ concat_node->fusedActivationFunction(luci::FusedActFunc::NONE);
+ input_1->dtype(loco::DataType::FLOAT32);
+ input_1->size<loco::DataType::FLOAT32>(5);
for (int i = 0; i < 5; i++)
{
// Set data {-2, -1, 0, 1, 2}
- input_1.at<loco::DataType::FLOAT32>(i) = i - 2.0;
+ input_1->at<loco::DataType::FLOAT32>(i) = i - 2.0;
}
- input_2.dtype(quant_type);
+ input_2->dtype(quant_type);
- concat_node.values(0, &input_1);
- concat_node.values(1, &input_2);
+ concat_node->values(0, input_1);
+ concat_node->values(1, input_2);
if (quant_type == loco::DataType::U8)
{
- addQuantParam(concat_node, {0.1}, {10});
- addQuantParam(input_2, {2.0}, {2});
+ addQuantParam(*concat_node, {0.1}, {10});
+ addQuantParam(*input_2, {2.0}, {2});
}
else if (quant_type == loco::DataType::S16)
{
- addQuantParam(concat_node, {0.1}, {0});
- addQuantParam(input_2, {2.0}, {0});
+ addQuantParam(*concat_node, {0.1}, {0});
+ addQuantParam(*input_2, {2.0}, {0});
}
else
{
}
}
- ~ConstInputConcatGraph()
- {
- concat_node.values(0, nullptr);
- concat_node.values(1, nullptr);
- }
-
public:
- luci::CircleConcatenation concat_node{2};
- luci::CircleConst input_1;
- luci::CircleConv2D input_2;
+ loco::Graph g;
+ luci::CircleConcatenation *concat_node = nullptr;
+ luci::CircleConst *input_1 = nullptr;
+ luci::CircleConv2D *input_2 = nullptr;
};
} // namespace
// input_1 is const. const values are quantized with the qparam of concat
ConstInputConcatGraph cg(loco::DataType::U8);
- luci::propagate_concat_quantparam(&cg.concat_node, loco::DataType::U8);
- EXPECT_FLOAT_EQ(0.1, cg.concat_node.quantparam()->scale[0]);
- EXPECT_EQ(10, cg.concat_node.quantparam()->zerop[0]);
- EXPECT_FLOAT_EQ(0.1, cg.input_1.quantparam()->scale[0]);
- EXPECT_EQ(10, cg.input_1.quantparam()->zerop[0]);
- EXPECT_FLOAT_EQ(0.1, cg.input_2.quantparam()->scale[0]);
- EXPECT_EQ(10, cg.input_2.quantparam()->zerop[0]);
- EXPECT_EQ(loco::DataType::U8, cg.input_1.dtype());
- EXPECT_EQ(0, cg.input_1.at<loco::DataType::U8>(0));
- EXPECT_EQ(0, cg.input_1.at<loco::DataType::U8>(1));
- EXPECT_EQ(10, cg.input_1.at<loco::DataType::U8>(2));
- EXPECT_EQ(20, cg.input_1.at<loco::DataType::U8>(3));
- EXPECT_EQ(30, cg.input_1.at<loco::DataType::U8>(4));
+ luci::propagate_concat_quantparam(cg.concat_node, loco::DataType::U8);
+ EXPECT_FLOAT_EQ(0.1, cg.concat_node->quantparam()->scale[0]);
+ EXPECT_EQ(10, cg.concat_node->quantparam()->zerop[0]);
+ const auto cg_input_1 = loco::must_cast<luci::CircleConst *>(cg.concat_node->values(0));
+ EXPECT_FLOAT_EQ(0.1, cg_input_1->quantparam()->scale[0]);
+ EXPECT_EQ(10, cg_input_1->quantparam()->zerop[0]);
+ EXPECT_FLOAT_EQ(0.1, cg.input_2->quantparam()->scale[0]);
+ EXPECT_EQ(10, cg.input_2->quantparam()->zerop[0]);
+ EXPECT_EQ(loco::DataType::U8, cg_input_1->dtype());
+ EXPECT_EQ(0, cg_input_1->at<loco::DataType::U8>(0));
+ EXPECT_EQ(0, cg_input_1->at<loco::DataType::U8>(1));
+ EXPECT_EQ(10, cg_input_1->at<loco::DataType::U8>(2));
+ EXPECT_EQ(20, cg_input_1->at<loco::DataType::U8>(3));
+ EXPECT_EQ(30, cg_input_1->at<loco::DataType::U8>(4));
}
TEST(PropagateConcatenationQparam, propagate_concat_quantparam_u8_NEG)
// concat has fused activation function and input_1 is const.
// const values are quantized using its min/max
ConstInputConcatGraph cg(loco::DataType::U8);
- cg.concat_node.fusedActivationFunction(luci::FusedActFunc::RELU);
- luci::propagate_concat_quantparam(&cg.concat_node, loco::DataType::U8);
- EXPECT_FLOAT_EQ(0.1, cg.concat_node.quantparam()->scale[0]);
- EXPECT_EQ(10, cg.concat_node.quantparam()->zerop[0]);
- EXPECT_FLOAT_EQ(0.015686275, cg.input_1.quantparam()->scale[0]);
- EXPECT_EQ(128, cg.input_1.quantparam()->zerop[0]);
- EXPECT_FLOAT_EQ(2.0, cg.input_2.quantparam()->scale[0]);
- EXPECT_EQ(2, cg.input_2.quantparam()->zerop[0]);
- EXPECT_EQ(loco::DataType::U8, cg.input_1.dtype());
- EXPECT_EQ(quantize(-2, cg.input_1.quantparam()), cg.input_1.at<loco::DataType::U8>(0));
- EXPECT_EQ(quantize(-1, cg.input_1.quantparam()), cg.input_1.at<loco::DataType::U8>(1));
- EXPECT_EQ(quantize(0, cg.input_1.quantparam()), cg.input_1.at<loco::DataType::U8>(2));
- EXPECT_EQ(quantize(1, cg.input_1.quantparam()), cg.input_1.at<loco::DataType::U8>(3));
- EXPECT_EQ(quantize(2, cg.input_1.quantparam()), cg.input_1.at<loco::DataType::U8>(4));
+ cg.concat_node->fusedActivationFunction(luci::FusedActFunc::RELU);
+ luci::propagate_concat_quantparam(cg.concat_node, loco::DataType::U8);
+ EXPECT_FLOAT_EQ(0.1, cg.concat_node->quantparam()->scale[0]);
+ EXPECT_EQ(10, cg.concat_node->quantparam()->zerop[0]);
+ const auto cg_input_1 = loco::must_cast<luci::CircleConst *>(cg.concat_node->values(0));
+ EXPECT_FLOAT_EQ(0.015686275, cg_input_1->quantparam()->scale[0]);
+ EXPECT_EQ(128, cg_input_1->quantparam()->zerop[0]);
+ EXPECT_FLOAT_EQ(2.0, cg.input_2->quantparam()->scale[0]);
+ EXPECT_EQ(2, cg.input_2->quantparam()->zerop[0]);
+ EXPECT_EQ(loco::DataType::U8, cg_input_1->dtype());
+ EXPECT_EQ(quantize(-2, cg_input_1->quantparam()), cg_input_1->at<loco::DataType::U8>(0));
+ EXPECT_EQ(quantize(-1, cg_input_1->quantparam()), cg_input_1->at<loco::DataType::U8>(1));
+ EXPECT_EQ(quantize(0, cg_input_1->quantparam()), cg_input_1->at<loco::DataType::U8>(2));
+ EXPECT_EQ(quantize(1, cg_input_1->quantparam()), cg_input_1->at<loco::DataType::U8>(3));
+ EXPECT_EQ(quantize(2, cg_input_1->quantparam()), cg_input_1->at<loco::DataType::U8>(4));
}
TEST(PropagateConcatenationQparam, propagate_concat_quantparam_i16)
// input_1 is const. const values are quantized with the qparam of concat
ConstInputConcatGraph cg(loco::DataType::S16);
- luci::propagate_concat_quantparam(&cg.concat_node, loco::DataType::S16);
- EXPECT_FLOAT_EQ(0.1, cg.concat_node.quantparam()->scale[0]);
- EXPECT_EQ(0, cg.concat_node.quantparam()->zerop[0]);
- EXPECT_FLOAT_EQ(0.1, cg.input_1.quantparam()->scale[0]);
- EXPECT_EQ(0, cg.input_1.quantparam()->zerop[0]);
- EXPECT_FLOAT_EQ(0.1, cg.input_2.quantparam()->scale[0]);
- EXPECT_EQ(0, cg.input_2.quantparam()->zerop[0]);
- EXPECT_EQ(loco::DataType::S16, cg.input_1.dtype());
- EXPECT_EQ(-20, cg.input_1.at<loco::DataType::S16>(0));
- EXPECT_EQ(-10, cg.input_1.at<loco::DataType::S16>(1));
- EXPECT_EQ(0, cg.input_1.at<loco::DataType::S16>(2));
- EXPECT_EQ(10, cg.input_1.at<loco::DataType::S16>(3));
- EXPECT_EQ(20, cg.input_1.at<loco::DataType::S16>(4));
+ luci::propagate_concat_quantparam(cg.concat_node, loco::DataType::S16);
+ EXPECT_FLOAT_EQ(0.1, cg.concat_node->quantparam()->scale[0]);
+ EXPECT_EQ(0, cg.concat_node->quantparam()->zerop[0]);
+ const auto cg_input_1 = loco::must_cast<luci::CircleConst *>(cg.concat_node->values(0));
+ EXPECT_FLOAT_EQ(0.1, cg_input_1->quantparam()->scale[0]);
+ EXPECT_EQ(0, cg_input_1->quantparam()->zerop[0]);
+ EXPECT_FLOAT_EQ(0.1, cg.input_2->quantparam()->scale[0]);
+ EXPECT_EQ(0, cg.input_2->quantparam()->zerop[0]);
+ EXPECT_EQ(loco::DataType::S16, cg_input_1->dtype());
+ EXPECT_EQ(-20, cg_input_1->at<loco::DataType::S16>(0));
+ EXPECT_EQ(-10, cg_input_1->at<loco::DataType::S16>(1));
+ EXPECT_EQ(0, cg_input_1->at<loco::DataType::S16>(2));
+ EXPECT_EQ(10, cg_input_1->at<loco::DataType::S16>(3));
+ EXPECT_EQ(20, cg_input_1->at<loco::DataType::S16>(4));
}
TEST(PropagateConcatenationQparam, propagate_concat_quantparam_i16_NEG)
// concat has fused activation function and input_1 is const.
// const values are quantized using its min/max
ConstInputConcatGraph cg(loco::DataType::S16);
- cg.concat_node.fusedActivationFunction(luci::FusedActFunc::RELU);
- luci::propagate_concat_quantparam(&cg.concat_node, loco::DataType::S16);
- EXPECT_FLOAT_EQ(0.1, cg.concat_node.quantparam()->scale[0]);
- EXPECT_EQ(0, cg.concat_node.quantparam()->zerop[0]);
- EXPECT_FLOAT_EQ(0.000061037, cg.input_1.quantparam()->scale[0]);
- EXPECT_EQ(0, cg.input_1.quantparam()->zerop[0]);
- EXPECT_FLOAT_EQ(2.0, cg.input_2.quantparam()->scale[0]);
- EXPECT_EQ(0, cg.input_2.quantparam()->zerop[0]);
- EXPECT_EQ(loco::DataType::S16, cg.input_1.dtype());
- EXPECT_EQ(quantize(-2, cg.input_1.quantparam()), cg.input_1.at<loco::DataType::S16>(0));
- EXPECT_EQ(quantize(-1, cg.input_1.quantparam()), cg.input_1.at<loco::DataType::S16>(1));
- EXPECT_EQ(quantize(0, cg.input_1.quantparam()), cg.input_1.at<loco::DataType::S16>(2));
- EXPECT_EQ(quantize(1, cg.input_1.quantparam()), cg.input_1.at<loco::DataType::S16>(3));
- EXPECT_EQ(quantize(2, cg.input_1.quantparam()), cg.input_1.at<loco::DataType::S16>(4));
+ cg.concat_node->fusedActivationFunction(luci::FusedActFunc::RELU);
+ luci::propagate_concat_quantparam(cg.concat_node, loco::DataType::S16);
+ EXPECT_FLOAT_EQ(0.1, cg.concat_node->quantparam()->scale[0]);
+ EXPECT_EQ(0, cg.concat_node->quantparam()->zerop[0]);
+ const auto cg_input_1 = loco::must_cast<luci::CircleConst *>(cg.concat_node->values(0));
+ EXPECT_FLOAT_EQ(0.000061037, cg_input_1->quantparam()->scale[0]);
+ EXPECT_EQ(0, cg_input_1->quantparam()->zerop[0]);
+ EXPECT_FLOAT_EQ(2.0, cg.input_2->quantparam()->scale[0]);
+ EXPECT_EQ(0, cg.input_2->quantparam()->zerop[0]);
+ EXPECT_EQ(loco::DataType::S16, cg_input_1->dtype());
+ EXPECT_EQ(quantize(-2, cg_input_1->quantparam()), cg_input_1->at<loco::DataType::S16>(0));
+ EXPECT_EQ(quantize(-1, cg_input_1->quantparam()), cg_input_1->at<loco::DataType::S16>(1));
+ EXPECT_EQ(quantize(0, cg_input_1->quantparam()), cg_input_1->at<loco::DataType::S16>(2));
+ EXPECT_EQ(quantize(1, cg_input_1->quantparam()), cg_input_1->at<loco::DataType::S16>(3));
+ EXPECT_EQ(quantize(2, cg_input_1->quantparam()), cg_input_1->at<loco::DataType::S16>(4));
}
INFO(l) << "PropagateQuantParamPass visit node: " << circle_node->name() << std::endl;
PropagateQuantParam pqp;
- changed = circle_node->accept(&pqp);
- if (changed)
- break;
+ if (circle_node->accept(&pqp))
+ changed = true;
}
return changed;
} // namespace
+TEST(PropagateQuantParamPassTest, name)
+{
+ luci::PropagateQuantParamPass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
TEST(PropagateQuantParam, simple)
{
SimpleGraph g;
data = data < nudged_min ? nudged_min : data;
data = data > nudged_max ? nudged_max : data;
quantized_values[i] =
- static_cast<int32_t>(std::round((data - nudged_min) * scaling_factor_inv));
+ static_cast<int32_t>(std::round((data - nudged_min) * scaling_factor_inv));
}
node->dtype(loco::DataType::U8); // change the type of tensor
for (uint32_t i = 0; i < size; ++i)
{
node->at<loco::DataType::S16>(i) =
- std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+ std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
}
}
void compute_sym_scale_zp(float min, float max, float &scaling_factor, int64_t &zp,
float &nudged_min, float &nudged_max)
{
- assert(min != max);
+ assert(min <= max);
const int32_t kMaxScale = std::numeric_limits<int16_t>::max();
const int32_t kMinScale = -kMaxScale;
scale_factor_from_max_side = rmax / qmax_double;
scaling_factor = scale_factor_from_min_side > scale_factor_from_max_side
- ? scale_factor_from_min_side
- : scale_factor_from_max_side;
+ ? scale_factor_from_min_side
+ : scale_factor_from_max_side;
zp = 0;
nudged_min = static_cast<float>(qmin_double * scaling_factor);
nudged_max = static_cast<float>(qmax_double * scaling_factor);
zp = nudged_zero_point;
}
-bool get_channel_dim_index(CircleConst *node, loco::TensorShape &dimension, int &channel_dim_index)
+bool get_channel_dim_index(CircleConst *node, loco::TensorShape &dimension,
+ int32_t &channel_dim_index)
{
auto succs = loco::succs(node);
uint32_t cal_offset(loco::TensorShape &dimension, uint32_t *indices)
{
return indices[0] * dimension.dim(1).value() * dimension.dim(2).value() *
- dimension.dim(3).value() +
+ dimension.dim(3).value() +
indices[1] * dimension.dim(2).value() * dimension.dim(3).value() +
indices[2] * dimension.dim(3).value() + indices[3];
}
float &scaling_factor, int64_t &zp, float &nudged_min,
float &nudged_max);
-bool get_channel_dim_index(CircleConst *node, loco::TensorShape &dimension, int &channel_dim_index);
+bool get_channel_dim_index(CircleConst *node, loco::TensorShape &dimension,
+ int32_t &channel_dim_index);
uint32_t cal_offset(loco::TensorShape &dimension, uint32_t *indices);
#include <iostream>
#include <cmath>
-
-namespace luci
-{
+#include <functional>
namespace
{
-void cal_minmax_per_channel(CircleConst *node, std::vector<float> &min, std::vector<float> &max)
+using namespace luci;
+using IterFunc = std::function<void(uint32_t *, loco::TensorShape &, int32_t)>;
+
+void iterate_per_channel(CircleConst *node, IterFunc func)
{
loco::TensorShape dimension;
dimension.rank(4);
uint32_t indices[4] = {
- 0,
+ 0,
};
- int channel_dim_index{0};
- int size{0};
+ int32_t channel_dim_index{0};
if (!get_channel_dim_index(node, dimension, channel_dim_index))
{
assert(false);
return;
}
- size = dimension.dim(channel_dim_index).value();
- std::vector<bool> has_min_max_value(size, false);
- min.resize(size);
- max.resize(size);
for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
{
for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
{
for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
{
- int channel_idx = indices[channel_dim_index];
- auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
- if (has_min_max_value[channel_idx])
- {
- min[channel_idx] = data < min[channel_idx] ? data : min[channel_idx];
- max[channel_idx] = data > max[channel_idx] ? data : max[channel_idx];
- }
- else
- {
- min[channel_idx] = data;
- max[channel_idx] = data;
- has_min_max_value[channel_idx] = true;
- }
+ func(indices, dimension, channel_dim_index);
}
}
}
}
}
+} // namespace
+
+namespace luci
+{
+
+namespace
+{
+
+void cal_minmax_per_channel(CircleConst *node, std::vector<float> &min, std::vector<float> &max)
+{
+ loco::TensorShape dimension;
+ dimension.rank(4);
+ int32_t channel_dim_index{0};
+
+ if (!get_channel_dim_index(node, dimension, channel_dim_index))
+ {
+ assert(false);
+ return;
+ }
+ auto size = dimension.dim(channel_dim_index).value();
+
+ std::vector<bool> has_min_max_value(size, false);
+ min.resize(size);
+ max.resize(size);
+
+ auto cal_minmax = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
+ int channel_idx = indices[channel_dim_index];
+ auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+ if (has_min_max_value[channel_idx])
+ {
+ min[channel_idx] = data < min[channel_idx] ? data : min[channel_idx];
+ max[channel_idx] = data > max[channel_idx] ? data : max[channel_idx];
+ }
+ else
+ {
+ min[channel_idx] = data;
+ max[channel_idx] = data;
+ has_min_max_value[channel_idx] = true;
+ }
+ };
+
+ iterate_per_channel(node, cal_minmax);
+}
+
void sym_wquant_per_channel(CircleConst *node, std::vector<float> &min, std::vector<float> &max,
std::vector<float> &scaling_factor, std::vector<int64_t> &zp,
std::vector<float> &nudged_min, std::vector<float> &nudged_max)
compute_sym_scale_zp(min[i], max[i], scaling_factor[i], zp[i], nudged_min[i], nudged_max[i]);
}
- loco::TensorShape dimension;
- dimension.rank(4);
- uint32_t indices[4] = {
- 0,
+ auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
+ int channel_idx = indices[channel_dim_index];
+ const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
+ auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+ data = data < nudged_min[channel_idx] ? nudged_min[channel_idx] : data;
+ data = data > nudged_max[channel_idx] ? nudged_max[channel_idx] : data;
+ quantized_values[cal_offset(dimension, indices)] =
+ static_cast<int32_t>(std::round(data * scaling_factor_inv));
};
- int channel_dim_index{0};
-
- if (!get_channel_dim_index(node, dimension, channel_dim_index))
- {
- assert(false);
- return;
- }
- for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
- {
- for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
- {
- for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
- {
- for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
- {
- int channel_idx = indices[channel_dim_index];
- const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
- auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
- data = data < nudged_min[channel_idx] ? nudged_min[channel_idx] : data;
- data = data > nudged_max[channel_idx] ? nudged_max[channel_idx] : data;
- quantized_values[cal_offset(dimension, indices)] =
- static_cast<int32_t>(std::round(data * scaling_factor_inv));
- }
- }
- }
- }
+ iterate_per_channel(node, quantize);
node->dtype(loco::DataType::S16); // change the type of tensor
node->size<loco::DataType::S16>(size); // resize tensor
for (uint32_t i = 0; i < size; ++i)
{
node->at<loco::DataType::S16>(i) =
- std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+ std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
}
}
uint32_t size = node->size<loco::DataType::S16>();
std::vector<float> dequantized_values(size);
- loco::TensorShape dimension;
- dimension.rank(4);
- uint32_t indices[4] = {
- 0,
+ auto dequantize = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
+ int channel_idx = indices[channel_dim_index];
+ auto data = node->at<loco::DataType::S16>(cal_offset(dimension, indices));
+ dequantized_values[cal_offset(dimension, indices)] =
+ static_cast<float>(data) * scaling_factor[channel_idx];
};
- int channel_dim_index{0};
-
- if (!get_channel_dim_index(node, dimension, channel_dim_index))
- {
- assert(false);
- return;
- }
- for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
- {
- for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
- {
- for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
- {
- for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
- {
- int channel_idx = indices[channel_dim_index];
- auto data = node->at<loco::DataType::S16>(cal_offset(dimension, indices));
- dequantized_values[cal_offset(dimension, indices)] =
- static_cast<float>(data) * scaling_factor[channel_idx];
- }
- }
- }
- }
+ iterate_per_channel(node, dequantize);
node->dtype(loco::DataType::FLOAT32); // change the type of tensor
node->size<loco::DataType::FLOAT32>(size); // resize tensor
compute_asym_scale_zp(min[i], max[i], scaling_factor[i], zp[i], nudged_min[i], nudged_max[i]);
}
- loco::TensorShape dimension;
- dimension.rank(4);
- uint32_t indices[4] = {
- 0,
+ auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
+ int channel_idx = indices[channel_dim_index];
+ const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
+ auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+ data = data < nudged_min[channel_idx] ? nudged_min[channel_idx] : data;
+ data = data > nudged_max[channel_idx] ? nudged_max[channel_idx] : data;
+ quantized_values[cal_offset(dimension, indices)] =
+ static_cast<int32_t>(std::round((data - nudged_min[channel_idx]) * scaling_factor_inv));
};
- int channel_dim_index{0};
-
- if (!get_channel_dim_index(node, dimension, channel_dim_index))
- {
- assert(false);
- return;
- }
- for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
- {
- for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
- {
- for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
- {
- for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
- {
- int channel_idx = indices[channel_dim_index];
- const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
- auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
- data = data < nudged_min[channel_idx] ? nudged_min[channel_idx] : data;
- data = data > nudged_max[channel_idx] ? nudged_max[channel_idx] : data;
- quantized_values[cal_offset(dimension, indices)] = static_cast<int32_t>(
- std::round((data - nudged_min[channel_idx]) * scaling_factor_inv));
- }
- }
- }
- }
+ iterate_per_channel(node, quantize);
node->dtype(loco::DataType::U8); // change the type of tensor
node->size<loco::DataType::U8>(size); // resize tensor
uint32_t size = node->size<loco::DataType::U8>();
std::vector<float> dequantized_values(size);
- loco::TensorShape dimension;
- dimension.rank(4);
- uint32_t indices[4] = {
- 0,
+ auto dequantize = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
+ int channel_idx = indices[channel_dim_index];
+ auto data = node->at<loco::DataType::U8>(cal_offset(dimension, indices));
+ dequantized_values[cal_offset(dimension, indices)] =
+ static_cast<float>(data) * scaling_factor[channel_idx] + nudged_min[channel_idx];
};
- int channel_dim_index{0};
-
- if (!get_channel_dim_index(node, dimension, channel_dim_index))
- {
- assert(false);
- return;
- }
- for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
- {
- for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
- {
- for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
- {
- for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
- {
- int channel_idx = indices[channel_dim_index];
- auto data = node->at<loco::DataType::U8>(cal_offset(dimension, indices));
- dequantized_values[cal_offset(dimension, indices)] =
- static_cast<float>(data) * scaling_factor[channel_idx] + nudged_min[channel_idx];
- }
- }
- }
- }
+ iterate_per_channel(node, dequantize);
node->dtype(loco::DataType::FLOAT32); // change the type of tensor
node->size<loco::DataType::FLOAT32>(size); // resize tensor
{
QuantizeDequantizeWeights(loco::DataType input, loco::DataType output,
QuantizationGranularity granularity)
- : input_type(input), output_type(output), granularity(granularity)
+ : input_type(input), output_type(output), granularity(granularity)
{
}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/QuantizeDequantizeWeightsPass.h"
+
+#include <gtest/gtest.h>
+
+TEST(QuantizeDequantizeWeightsPassTest, name)
+{
+ luci::QuantizeDequantizeWeightsPass pass(loco::DataType::FLOAT32, loco::DataType::U8,
+ luci::QuantizationGranularity::LayerWise);
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
#include <luci/IR/CircleNodes.h>
#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Service/Nodes/CircleConst.h>
#include <luci/Log.h>
#include <oops/UserExn.h>
#include <iostream>
#include <cmath>
+#include <functional>
+
+namespace
+{
+
+using namespace luci;
+using IterFunc = std::function<void(uint32_t *, loco::TensorShape &, int32_t)>;
+
+void iterate_per_channel(CircleConst *node, int32_t &channel_dim_index, IterFunc func)
+{
+ loco::TensorShape dimension;
+ dimension.rank(4);
+ uint32_t indices[4] = {
+ 0,
+ };
+
+ if (!get_channel_dim_index(node, dimension, channel_dim_index))
+ {
+ assert(false);
+ return;
+ }
+
+ for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
+ {
+ for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
+ {
+ for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
+ {
+ for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
+ {
+ func(indices, dimension, channel_dim_index);
+ }
+ }
+ }
+ }
+}
+
+} // namespace
namespace luci
{
namespace
{
+// Create a new const node from an existing node.
+// The new node has the following characteristics
+// type: T
+// shape: same with 'node' (given as an argument)
+// buffer size: 'size' (given as an argument)
+// Note that contents are not filled in this function.
+template <loco::DataType T>
+luci::CircleConst *create_empty_const_from(luci::CircleConst *node, uint32_t size)
+{
+ auto new_node = node->graph()->nodes()->create<CircleConst>();
+ // TODO: We don't have any naming convention for quantized nodes yet.
+ // Fix this when we have one.
+ new_node->name(node->name());
+ new_node->dtype(T);
+ new_node->rank(node->rank());
+ for (uint32_t i = 0; i < node->rank(); i++)
+ new_node->dim(i).set(node->dim(i).value());
+
+ new_node->size<T>(size);
+ new_node->shape_status(luci::ShapeStatus::VALID);
+
+ return new_node;
+}
+
void overwrite_quantparam(luci::CircleConcatenation *concat, luci::CircleNode *target)
{
auto concat_qparam = concat->quantparam();
auto quantparam = std::make_unique<CircleQuantParam>();
target->quantparam(std::move(quantparam));
target_qparam = target->quantparam();
+
+ if (target_qparam == nullptr)
+ throw std::runtime_error("Creating new quant param failed");
}
target_qparam->min = concat_qparam->min;
target_qparam->max = concat_qparam->max;
const_node->size<loco::DataType::S16>(size); // resize tensor
for (uint32_t i = 0; i < size; ++i)
const_node->at<loco::DataType::S16>(i) =
- std::min(32767, std::max(-32767, quantized_values[i]));
+ std::min(32767, std::max(-32767, quantized_values[i]));
break;
default:
throw std::runtime_error("Unsupported data type");
}
// Check if the node is the bias of Conv2D, DepthwiseConv2D, FullyConnected, or TransposeConv layer
-// If true, return <input, weight> pair of the successor node (used to quantize bias)
-// If flase, return <nullptr, nullptr>
-std::pair<loco::Node *, loco::Node *> get_input_weight_of_bias(CircleNode *node)
+// Returns a list of <input, weights, output> vectors for the above operators.
+// Note that it returns a 'list' because bias can be used by multiple operators.
+std::vector<std::vector<loco::Node *>> get_input_weight_output_of_bias(CircleNode *node)
{
+ std::vector<std::vector<loco::Node *>> result;
auto circle_const = dynamic_cast<CircleConst *>(node);
if (circle_const == nullptr)
- return std::make_pair(nullptr, nullptr);
+ return result;
auto succs = loco::succs(node);
- if (succs.size() != 1) // assume bias is used by only one node
- return std::make_pair(nullptr, nullptr);
for (auto out : succs)
{
{
assert(conv->input() != nullptr);
assert(conv->filter() != nullptr);
- return std::make_pair(conv->input(), conv->filter());
+ result.push_back({conv->input(), conv->filter(), conv});
+ continue;
}
auto dw_conv = dynamic_cast<CircleDepthwiseConv2D *>(out);
if (dw_conv != nullptr && dw_conv->bias() == circle_const)
{
assert(dw_conv->input() != nullptr);
assert(dw_conv->filter() != nullptr);
- return std::make_pair(dw_conv->input(), dw_conv->filter());
+ result.push_back({dw_conv->input(), dw_conv->filter(), dw_conv});
+ continue;
}
auto fc = dynamic_cast<CircleFullyConnected *>(out);
if (fc != nullptr && fc->bias() == circle_const)
{
assert(fc->input() != nullptr);
assert(fc->weights() != nullptr);
- return std::make_pair(fc->input(), fc->weights());
+ result.push_back({fc->input(), fc->weights(), fc});
+ continue;
}
auto tconv = dynamic_cast<CircleTransposeConv *>(out);
if (tconv != nullptr && tconv->bias() == circle_const)
{
assert(tconv->outBackprop() != nullptr);
assert(tconv->filter() != nullptr);
- return std::make_pair(tconv->outBackprop(), tconv->filter());
+ result.push_back({tconv->outBackprop(), tconv->filter(), tconv});
+ continue;
}
}
- return std::make_pair(nullptr, nullptr);
+ return result;
}
-void asym_quant_bias_per_layer(CircleConst *node, float input_scale, float weight_scale,
- float *scaling_factor, int64_t *zp)
+CircleConst *asym_quant_bias_per_layer(CircleConst *node, float input_scale, float weight_scale,
+ float *scaling_factor, int64_t *zp)
{
float scale = input_scale * weight_scale;
const float scaling_factor_inv = (scale == 0) ? 0 : 1.0 / scale;
for (uint32_t i = 0; i < size; ++i)
{
quantized_values[i] =
- static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
+ static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
}
- node->dtype(loco::DataType::S32); // change the type of tensor
- node->size<loco::DataType::S32>(size); // resize tensor
+ auto new_bias = create_empty_const_from<loco::DataType::S32>(node, size);
+
const int32_t kMinScale = std::numeric_limits<int32_t>::lowest();
const int32_t kMaxScale = std::numeric_limits<int32_t>::max();
for (uint32_t i = 0; i < size; ++i)
{
- node->at<loco::DataType::S32>(i) =
- std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+ new_bias->at<loco::DataType::S32>(i) =
+ std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
}
*scaling_factor = scale;
*zp = 0;
+
+ return new_bias;
}
-void quant_bias_per_channel(CircleConst *node, float input_scale, std::vector<float> &weight_scale,
- std::vector<float> &scaling_factor, std::vector<int64_t> &zp)
+CircleConst *quant_bias_per_channel(CircleConst *node, float input_scale,
+ std::vector<float> &weight_scale,
+ std::vector<float> &scaling_factor, std::vector<int64_t> &zp)
{
float scaling_factor_inv{0};
scaling_factor[i] = input_scale * weight_scale[i];
scaling_factor_inv = (scaling_factor[i] == 0) ? 0 : 1.0 / scaling_factor[i];
quantized_values[i] =
- static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
+ static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
zp[i] = 0;
}
- node->dtype(loco::DataType::S32); // change the type of tensor
- node->size<loco::DataType::S32>(size); // resize tensor
+ auto new_bias = create_empty_const_from<loco::DataType::S32>(node, size);
+
const int32_t kMinScale = std::numeric_limits<int32_t>::lowest();
const int32_t kMaxScale = std::numeric_limits<int32_t>::max();
for (uint32_t i = 0; i < size; ++i)
{
- node->at<loco::DataType::S32>(i) =
- std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+ new_bias->at<loco::DataType::S32>(i) =
+ std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
}
+
+ return new_bias;
}
-void int16_quant_bias_per_channel(CircleConst *node, float input_scale,
- std::vector<float> &weight_scale,
- std::vector<float> &scaling_factor, std::vector<int64_t> &zp)
+CircleConst *int16_quant_bias_per_channel(CircleConst *node, float input_scale,
+ std::vector<float> &weight_scale,
+ std::vector<float> &scaling_factor,
+ std::vector<int64_t> &zp)
{
float scaling_factor_inv{0};
scaling_factor[i] = input_scale * weight_scale[i];
scaling_factor_inv = (scaling_factor[i] == 0) ? 0 : 1.0 / scaling_factor[i];
quantized_values[i] =
- static_cast<int64_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
+ static_cast<int64_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
zp[i] = 0;
}
- node->dtype(loco::DataType::S64); // change the type of tensor
- node->size<loco::DataType::S64>(size); // resize tensor
+ auto new_bias = create_empty_const_from<loco::DataType::S64>(node, size);
+
for (uint32_t i = 0; i < size; ++i)
{
- node->at<loco::DataType::S64>(i) = quantized_values[i];
+ new_bias->at<loco::DataType::S64>(i) = quantized_values[i];
}
+
+ return new_bias;
}
bool has_min_max(const CircleNode *node)
uint32_t size = node->size<loco::DataType::FLOAT32>();
std::vector<int32_t> quantized_values(size);
- loco::TensorShape dimension;
- dimension.rank(4);
- uint32_t indices[4] = {
- 0,
+ auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int32_t channel_dim_index) {
+ int channel_idx = indices[channel_dim_index];
+ const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
+ auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+ quantized_values[cal_offset(dimension, indices)] =
+ static_cast<int32_t>(std::round(data * scaling_factor_inv));
};
- if (!get_channel_dim_index(node, dimension, channel_dim_index))
- {
- assert(false);
- return;
- }
-
- for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
- {
- for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
- {
- for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
- {
- for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
- {
- int channel_idx = indices[channel_dim_index];
- const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
- auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
- quantized_values[cal_offset(dimension, indices)] =
- static_cast<int32_t>(std::round(data * scaling_factor_inv));
- }
- }
- }
- }
+ iterate_per_channel(node, channel_dim_index, quantize);
node->dtype(loco::DataType::S16); // change the type of tensor
node->size<loco::DataType::S16>(size); // resize tensor
for (uint32_t i = 0; i < size; ++i)
{
node->at<loco::DataType::S16>(i) =
- std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+ std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
}
}
uint32_t size = node->size<loco::DataType::FLOAT32>();
std::vector<int32_t> quantized_values(size);
- loco::TensorShape dimension;
- dimension.rank(4);
- uint32_t indices[4] = {
- 0,
+ auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int32_t channel_dim_index) {
+ int channel_idx = indices[channel_dim_index];
+ const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
+ auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+ quantized_values[cal_offset(dimension, indices)] =
+ static_cast<int32_t>(std::round((data - min[channel_idx]) * scaling_factor_inv));
};
- if (!get_channel_dim_index(node, dimension, channel_dim_index))
- {
- assert(false);
- return;
- }
-
- for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
- {
- for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
- {
- for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
- {
- for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
- {
- int channel_idx = indices[channel_dim_index];
- const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
- auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
- quantized_values[cal_offset(dimension, indices)] =
- static_cast<int32_t>(std::round((data - min[channel_idx]) * scaling_factor_inv));
- }
- }
- }
- }
+ iterate_per_channel(node, channel_dim_index, quantize);
node->dtype(loco::DataType::U8); // change the type of tensor
node->size<loco::DataType::U8>(size); // resize tensor
}
}
+void set_bias(luci::CircleNode *node, luci::CircleConst *bias)
+{
+ if (auto conv = dynamic_cast<CircleConv2D *>(node))
+ conv->bias(bias);
+ else if (auto dconv = dynamic_cast<CircleDepthwiseConv2D *>(node))
+ dconv->bias(bias);
+ else if (auto tconv = dynamic_cast<CircleTransposeConv *>(node))
+ tconv->bias(bias);
+ else if (auto fc = dynamic_cast<CircleFullyConnected *>(node))
+ fc->bias(bias);
+ else
+ throw std::runtime_error("Only convolution, depthwise convolution, transposed convolution, and "
+ "fully-connected layer have bias");
+}
+
/**
* @brief QuantizeActivation quantizes tensors for activations
* @details Quantize using recorded min/max values
struct QuantizeActivation final : public luci::CircleNodeMutableVisitor<bool>
{
QuantizeActivation(loco::DataType input, loco::DataType output)
- : input_type(input), output_type(output)
+ : input_type(input), output_type(output)
{
}
continue;
// Check if this is bias (bias is quantized later)
- auto iw = get_input_weight_of_bias(circle_node);
- if (iw.first != nullptr && iw.second != nullptr)
+ auto iwo = get_input_weight_output_of_bias(circle_node);
+ if (iwo.size() > 0)
+ continue;
+
+ // Check if this is bool type (bool type is not quantized)
+ if (circle_node->dtype() == loco::DataType::BOOL)
continue;
// Check if this is activation
struct QuantizeBias final : public luci::CircleNodeMutableVisitor<bool>
{
QuantizeBias(loco::DataType input, loco::DataType output, QuantizationGranularity gr)
- : input_type(input), output_type(output), granularity(gr)
+ : input_type(input), output_type(output), granularity(gr)
{
}
if (is_quantized(node))
return false;
- // Check if this is bias
- auto iw = get_input_weight_of_bias(node);
- if (iw.first == nullptr || iw.second == nullptr)
- return false;
-
- auto input = loco::must_cast<luci::CircleNode *>(iw.first);
- auto weight = loco::must_cast<luci::CircleNode *>(iw.second);
+ auto iwo_list = get_input_weight_output_of_bias(node);
- if (granularity == QuantizationGranularity::ChannelWise)
+ for (auto iwo : iwo_list)
{
- assert(input->quantparam()->scale.size() == 1); // input scale's layer-wise
- auto input_scale = input->quantparam()->scale[0];
+ assert(iwo.size() == 3);
- assert(weight->quantparam() != nullptr); // weight scale's channel-wise
- auto weight_scale = weight->quantparam()->scale;
+ auto input = loco::must_cast<luci::CircleNode *>(iwo[0]);
+ auto weight = loco::must_cast<luci::CircleNode *>(iwo[1]);
+ auto output = loco::must_cast<luci::CircleNode *>(iwo[2]);
- auto circle_const = loco::must_cast<luci::CircleConst *>(node);
+ auto const_bias = loco::must_cast<luci::CircleConst *>(node);
+ assert(const_bias->dtype() == loco::DataType::FLOAT32);
- uint32_t size = circle_const->size<loco::DataType::FLOAT32>();
- assert(size == weight_scale.size());
- std::vector<float> scaling_factor(size);
- std::vector<int64_t> zp(size);
+ CircleConst *new_bias = nullptr;
- if (output_type == loco::DataType::U8)
- {
- quant_bias_per_channel(circle_const, input_scale, weight_scale, scaling_factor, zp);
- }
- else if (output_type == loco::DataType::S16)
+ if (granularity == QuantizationGranularity::ChannelWise)
{
- int16_quant_bias_per_channel(circle_const, input_scale, weight_scale, scaling_factor, zp);
+ assert(input->quantparam()->scale.size() == 1); // input scale's layer-wise
+ auto input_scale = input->quantparam()->scale[0];
+
+ assert(weight->quantparam() != nullptr); // weight scale's channel-wise
+ auto weight_scale = weight->quantparam()->scale;
+
+ uint32_t size = const_bias->size<loco::DataType::FLOAT32>();
+ assert(size == weight_scale.size());
+ std::vector<float> scaling_factor(size);
+ std::vector<int64_t> zp(size);
+
+ if (output_type == loco::DataType::U8)
+ {
+ new_bias =
+ quant_bias_per_channel(const_bias, input_scale, weight_scale, scaling_factor, zp);
+ }
+ else if (output_type == loco::DataType::S16)
+ {
+ new_bias =
+ int16_quant_bias_per_channel(const_bias, input_scale, weight_scale, scaling_factor, zp);
+ }
+ else
+ {
+ throw std::runtime_error("Unsupported quantization type.");
+ }
+
+ auto quantparam = std::make_unique<CircleQuantParam>();
+ quantparam->scale = scaling_factor;
+ quantparam->zerop = zp;
+ assert(new_bias->quantparam() == nullptr); // bias should not be quantized before
+ new_bias->quantparam(std::move(quantparam));
+
+ set_bias(output, new_bias);
}
else
{
- throw std::runtime_error("Unsupported quantization type.");
- }
+ assert(input->quantparam()->scale.size() == 1); // Only support per-layer quant
+ auto input_scale = input->quantparam()->scale[0];
- auto quantparam = std::make_unique<CircleQuantParam>();
- quantparam->scale = scaling_factor;
- quantparam->zerop = zp;
- assert(circle_const->quantparam() == nullptr); // bias should not be quantized before
- circle_const->quantparam(std::move(quantparam));
- }
- else
- {
- assert(input->quantparam()->scale.size() == 1); // Only support per-layer quant
- auto input_scale = input->quantparam()->scale[0];
-
- assert(weight->quantparam()->scale.size() == 1); // Only support per-layer quant
- auto weight_scale = weight->quantparam()->scale[0];
-
- auto circle_const = loco::must_cast<luci::CircleConst *>(node);
- float scaling_factor{0};
- int64_t zp{0};
- asym_quant_bias_per_layer(circle_const, input_scale, weight_scale, &scaling_factor, &zp);
- auto quantparam = std::make_unique<CircleQuantParam>();
- quantparam->scale.push_back(scaling_factor);
- quantparam->zerop.push_back(zp);
- assert(circle_const->quantparam() == nullptr); // bias should not be quantized before
- circle_const->quantparam(std::move(quantparam));
+ assert(weight->quantparam()->scale.size() == 1); // Only support per-layer quant
+ auto weight_scale = weight->quantparam()->scale[0];
+
+ float scaling_factor{0};
+ int64_t zp{0};
+ new_bias =
+ asym_quant_bias_per_layer(const_bias, input_scale, weight_scale, &scaling_factor, &zp);
+ auto quantparam = std::make_unique<CircleQuantParam>();
+ quantparam->scale.push_back(scaling_factor);
+ quantparam->zerop.push_back(zp);
+ assert(new_bias->quantparam() == nullptr); // bias should not be quantized before
+ new_bias->quantparam(std::move(quantparam));
+
+ set_bias(output, new_bias);
+ }
}
return false;
}
struct QuantizeWeights final : public luci::CircleNodeMutableVisitor<bool>
{
QuantizeWeights(loco::DataType input, loco::DataType output, QuantizationGranularity gr)
- : input_type(input), output_type(output), granularity(gr)
+ : input_type(input), output_type(output), granularity(gr)
{
}
loco::DataType output_type;
QuantizationGranularity granularity;
- // Quantize input tensors of each node
- bool visit(luci::CircleNode *node)
+private:
+ void quantize_weights(luci::CircleConst *weights)
{
- LOGGER(l);
- INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
- auto arity = node->arity();
- for (uint32_t i = 0; i < arity; i++)
+ // Find min/max per channel-wise
+ if (granularity == QuantizationGranularity::ChannelWise)
{
- auto input_node = node->arg(i);
- auto circle_node = loco::must_cast<luci::CircleNode *>(input_node);
+ auto quantparam = weights->quantparam();
+ if (quantparam == nullptr)
+ {
+ assert(false && "quantparam is nullptr");
+ return;
+ }
- // Check if this is already quantized
- if (is_quantized(circle_node))
- continue;
+ auto min = quantparam->min;
+ auto scaling_factor = quantparam->scale;
+ int32_t channel_dim_index = 0;
- if (is_weights(circle_node))
+ if (output_type == loco::DataType::U8)
{
- auto circle_const = loco::must_cast<luci::CircleConst *>(circle_node);
-
- // Find min/max per channel-wise
- if (granularity == QuantizationGranularity::ChannelWise)
- {
- auto quantparam = circle_node->quantparam();
- if (quantparam == nullptr)
- {
- assert(false && "quantparam is nullptr");
- return false;
- }
-
- auto min = quantparam->min;
- auto scaling_factor = quantparam->scale;
- int32_t channel_dim_index = 0;
-
- if (output_type == loco::DataType::U8)
- {
- asym_wquant_per_channel(circle_const, min, scaling_factor, channel_dim_index);
- }
- else
- {
- sym_wquant_per_channel(circle_const, scaling_factor, channel_dim_index);
- }
- quantparam->min.clear();
- quantparam->max.clear();
- quantparam->quantized_dimension = channel_dim_index;
- }
- // Find min/max per layer-wise
- else
- {
- // Quantize using recorded quantparam
- auto quantparam = circle_node->quantparam();
- assert(quantparam != nullptr);
- assert(quantparam->min.size() == 1); // only support layer-wise quant
- assert(quantparam->scale.size() == 1); // only support layer-wise quant
- auto min = quantparam->min[0];
- auto scaling_factor = quantparam->scale[0];
- asym_wquant_per_layer(circle_const, min, scaling_factor);
- quantparam->min.clear();
- quantparam->max.clear();
- }
+ asym_wquant_per_channel(weights, min, scaling_factor, channel_dim_index);
+ }
+ else
+ {
+ sym_wquant_per_channel(weights, scaling_factor, channel_dim_index);
}
+ quantparam->min.clear();
+ quantparam->max.clear();
+ quantparam->quantized_dimension = channel_dim_index;
+ }
+ // Find min/max per layer-wise
+ else
+ {
+ // Quantize using recorded quantparam
+ auto quantparam = weights->quantparam();
+ assert(quantparam != nullptr);
+ assert(quantparam->min.size() == 1); // only support layer-wise quant
+ assert(quantparam->scale.size() == 1); // only support layer-wise quant
+ auto min = quantparam->min[0];
+ auto scaling_factor = quantparam->scale[0];
+ asym_wquant_per_layer(weights, min, scaling_factor);
+ quantparam->min.clear();
+ quantparam->max.clear();
}
- return false;
}
-};
-void quant_instnorm(luci::CircleInstanceNorm *node, loco::DataType output_type,
- QuantizationGranularity granularity)
-{
- auto gamma = loco::must_cast<luci::CircleConst *>(node->gamma());
- auto beta = loco::must_cast<luci::CircleConst *>(node->beta());
- assert(gamma->dtype() == loco::DataType::FLOAT32);
- assert(beta->dtype() == loco::DataType::FLOAT32);
+ bool visit(luci::CircleConv2D *node)
+ {
+ LOGGER(l);
+ INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
- if (granularity == QuantizationGranularity::LayerWise)
+ auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
+ if (!is_quantized(weights))
+ {
+ auto new_weights = luci::clone(weights);
+ node->filter(new_weights);
+ quantize_weights(new_weights);
+ return true;
+ }
+ return false;
+ }
+
+ bool visit(luci::CircleDepthwiseConv2D *node)
{
- quant_const(gamma, output_type);
- quant_const(beta, output_type);
+ LOGGER(l);
+ INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
+
+ auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
+ if (!is_quantized(weights))
+ {
+ auto new_weights = luci::clone(weights);
+ node->filter(new_weights);
+ quantize_weights(new_weights);
+ return true;
+ }
+ return false;
}
- else if (granularity == QuantizationGranularity::ChannelWise)
+
+ bool visit(luci::CircleInstanceNorm *node)
{
- quant_const_per_channel(gamma, output_type);
- quant_const_per_channel(beta, output_type);
+ LOGGER(l);
+ INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
+
+ auto gamma = loco::must_cast<luci::CircleConst *>(node->gamma());
+ auto beta = loco::must_cast<luci::CircleConst *>(node->beta());
+
+ bool changed = false;
+ if (!is_quantized(gamma))
+ {
+ assert(gamma->dtype() == loco::DataType::FLOAT32);
+ auto new_gamma = luci::clone(gamma);
+ if (granularity == QuantizationGranularity::LayerWise)
+ quant_const(new_gamma, output_type);
+ else if (granularity == QuantizationGranularity::ChannelWise)
+ quant_const_per_channel(new_gamma, output_type);
+ node->gamma(new_gamma);
+ changed = true;
+ }
+ if (!is_quantized(beta))
+ {
+ assert(beta->dtype() == loco::DataType::FLOAT32);
+ auto new_beta = luci::clone(beta);
+ if (granularity == QuantizationGranularity::LayerWise)
+ quant_const(new_beta, output_type);
+ else if (granularity == QuantizationGranularity::ChannelWise)
+ quant_const_per_channel(new_beta, output_type);
+ node->beta(new_beta);
+ changed = true;
+ }
+
+ return changed;
}
- else
- throw std::runtime_error("Quantization granularity must be either 'layer' or 'channel'");
-}
-void quant_prelu(luci::CirclePRelu *node, loco::DataType output_type,
- QuantizationGranularity granularity)
-{
- auto alpha = loco::must_cast<luci::CircleConst *>(node->alpha());
- assert(alpha->dtype() == loco::DataType::FLOAT32);
+ bool visit(luci::CirclePRelu *node)
+ {
+ LOGGER(l);
+ INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
+
+ auto alpha = loco::must_cast<luci::CircleConst *>(node->alpha());
+
+ if (!is_quantized(alpha))
+ {
+ assert(alpha->dtype() == loco::DataType::FLOAT32);
+ auto new_alpha = luci::clone(alpha);
+ if (granularity == QuantizationGranularity::LayerWise)
+ quant_const(new_alpha, output_type);
+ else if (granularity == QuantizationGranularity::ChannelWise)
+ quant_const_per_channel(new_alpha, output_type);
+ node->alpha(new_alpha);
+ return true;
+ }
- if (granularity == QuantizationGranularity::LayerWise)
+ return false;
+ }
+
+ bool visit(luci::CircleTransposeConv *node)
{
- quant_const(alpha, output_type);
+ LOGGER(l);
+ INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
+
+ auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
+ if (!is_quantized(weights))
+ {
+ auto new_weights = luci::clone(weights);
+ node->filter(new_weights);
+ quantize_weights(new_weights);
+ return true;
+ }
+ return false;
}
- else if (granularity == QuantizationGranularity::ChannelWise)
+
+ bool visit(luci::CircleFullyConnected *node)
{
- quant_const_per_channel(alpha, output_type);
+ LOGGER(l);
+ INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
+
+ auto weights = loco::must_cast<luci::CircleConst *>(node->weights());
+ if (!is_quantized(weights))
+ {
+ auto new_weights = luci::clone(weights);
+ node->weights(new_weights);
+ quantize_weights(new_weights);
+ return true;
+ }
+ return false;
}
- else
- throw std::runtime_error("Quantization granularity must be either 'layer' or 'channel'");
-}
+
+ bool visit(luci::CircleNode *) { return false; }
+};
/**
* @brief Quantize const input tensors using min/max of const values
*/
-void quantize_const_inputs(luci::CircleNode *node, loco::DataType output_type,
- QuantizationGranularity granularity)
+void quantize_const_inputs(luci::CircleNode *node, loco::DataType output_type)
{
auto opcode = node->opcode();
auto arity = node->arity();
case luci::CircleOpcode::CONV_2D:
case luci::CircleOpcode::DEPTHWISE_CONV_2D:
case luci::CircleOpcode::FULLY_CONNECTED:
+ case luci::CircleOpcode::INSTANCE_NORM:
+ case luci::CircleOpcode::PRELU:
case luci::CircleOpcode::TRANSPOSE_CONV:
// Handled in QuantizeWeights and QuantizeBias
break;
// Handled in propagate_concat_quantparam
break;
+ case luci::CircleOpcode::LOGICAL_OR:
+ // Inputs of logical Ops are bool, thus not quantized
+ break;
+
case luci::CircleOpcode::ARG_MAX:
case luci::CircleOpcode::ARG_MIN:
+ case luci::CircleOpcode::BATCH_TO_SPACE_ND:
case luci::CircleOpcode::MEAN:
case luci::CircleOpcode::PAD:
case luci::CircleOpcode::REDUCE_ANY:
case luci::CircleOpcode::RESIZE_BILINEAR:
case luci::CircleOpcode::RESIZE_NEAREST_NEIGHBOR:
case luci::CircleOpcode::REVERSE_SEQUENCE:
+ case luci::CircleOpcode::SLICE:
+ case luci::CircleOpcode::SPACE_TO_BATCH_ND:
+ case luci::CircleOpcode::STRIDED_SLICE:
case luci::CircleOpcode::SUM:
case luci::CircleOpcode::TILE:
case luci::CircleOpcode::TOPK_V2:
// Ex: axis, paddings
input_node = node->arg(0);
const_node = dynamic_cast<luci::CircleConst *>(input_node);
- if (const_node != nullptr)
+ if (const_node != nullptr && !is_quantized(const_node))
quant_const(const_node, output_type);
break;
- case luci::CircleOpcode::INSTANCE_NORM:
- quant_instnorm(loco::must_cast<luci::CircleInstanceNorm *>(node), output_type, granularity);
- break;
-
- case luci::CircleOpcode::PRELU:
- quant_prelu(loco::must_cast<luci::CirclePRelu *>(node), output_type, granularity);
- break;
-
case luci::CircleOpcode::ADD:
case luci::CircleOpcode::ADD_N:
+ case luci::CircleOpcode::DEPTH_TO_SPACE:
case luci::CircleOpcode::DIV:
+ case luci::CircleOpcode::ELU:
case luci::CircleOpcode::EQUAL:
+ case luci::CircleOpcode::FLOOR:
+ case luci::CircleOpcode::FLOOR_DIV:
case luci::CircleOpcode::GREATER:
case luci::CircleOpcode::GREATER_EQUAL:
case luci::CircleOpcode::LESS:
case luci::CircleOpcode::LESS_EQUAL:
+ case luci::CircleOpcode::LOGISTIC:
case luci::CircleOpcode::MAXIMUM:
case luci::CircleOpcode::MINIMUM:
case luci::CircleOpcode::MUL:
case luci::CircleOpcode::NOT_EQUAL:
+ case luci::CircleOpcode::POW:
+ case luci::CircleOpcode::RSQRT:
+ case luci::CircleOpcode::SOFTMAX:
+ case luci::CircleOpcode::SPACE_TO_DEPTH:
+ case luci::CircleOpcode::SQRT:
case luci::CircleOpcode::SUB:
+ case luci::CircleOpcode::TANH:
// Quantize all const inputs using their values
for (uint32_t i = 0; i < arity; i++)
{
input_node = node->arg(i);
const_node = dynamic_cast<luci::CircleConst *>(input_node);
- if (const_node != nullptr)
+ if (const_node != nullptr && !is_quantized(const_node))
quant_const(const_node, output_type);
}
break;
+ case luci::CircleOpcode::SPLIT:
+ // Only the second input is quantized
+ // First input should not be quantized (e.g., split_dim)
+ input_node = node->arg(1);
+ const_node = dynamic_cast<luci::CircleConst *>(input_node);
+ if (const_node != nullptr && !is_quantized(const_node))
+ quant_const(const_node, output_type);
+ break;
+
default:
for (uint32_t i = 0; i < arity; i++)
{
* (U8 qparam2)
*
* AFTER
- * [CircleNode] [CircleConst]
- * (U8 qparam2) (U8 qparam2)
+ * [CircleNode] [CircleConst] [CircleConst] <- Dead node
+ * (U8 qparam2) (U8 qparam2) (FP32)
* \ /
* \ /
* [CircleConcatenation]
auto node = concat->arg(i);
auto const_node = dynamic_cast<luci::CircleConst *>(node);
if (const_node != nullptr)
- quant_const(const_node, quant_type);
+ {
+ auto new_const = luci::clone(const_node);
+ quant_const(new_const, quant_type);
+ concat->values(i, new_const);
+ }
}
return;
}
if (node->opcode() == luci::CircleOpcode::CONCATENATION)
continue;
- // Skip if this input is used by other Ops
- auto succs = loco::succs(node);
- if (succs.size() != 1)
- {
- if (node->opcode() == luci::CircleOpcode::CIRCLECONST)
- {
- luci::CircleConst *const_node = loco::must_cast<luci::CircleConst *>(node);
- quant_const(const_node, quant_type);
- }
- continue;
- }
-
- assert(succs.find(concat) != succs.end());
-
// Quantize constant values
if (node->opcode() == luci::CircleOpcode::CIRCLECONST)
{
const auto scaling_factor = concat_qparam->scale[0];
const auto zerop = concat_qparam->zerop[0];
- quant_const_values(const_node, scaling_factor, zerop, quant_type);
+ auto new_const = luci::clone(const_node);
+ quant_const_values(new_const, scaling_factor, zerop, quant_type);
+ concat->values(i, new_const);
+ overwrite_quantparam(concat, new_const);
}
else
{
+ const auto succs = loco::succs(node);
+ if (succs.size() > 1)
+ continue;
+
// Non-const input must have been quantized
assert(node->quantparam() != nullptr);
+ overwrite_quantparam(concat, node);
}
-
- overwrite_quantparam(concat, node);
}
}
circle_node->accept(&qb);
}
- // Quantize const inputs other than weights and bias
- for (auto node : loco::active_nodes(loco::output_nodes(g)))
- {
- auto circle_node = loco::must_cast<luci::CircleNode *>(node);
- quantize_const_inputs(circle_node, _output_dtype, _granularity);
- }
-
// Propagate quantization parameters of concat Op
for (auto node : loco::active_nodes(loco::output_nodes(g)))
{
propagate_concat_quantparam(concat, _output_dtype);
}
+ // Quantize const inputs other than weights and bias
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ quantize_const_inputs(circle_node, _output_dtype);
+ }
+
// Update output dtype
auto graph_outputs = g->outputs();
for (auto node : loco::output_nodes(g))
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/QuantizeWithMinMaxPass.h"
+
+#include <gtest/gtest.h>
+
+TEST(QuantizeWithMinMaxPassTest, name)
+{
+ luci::QuantizeWithMinMaxPass pass(loco::DataType::FLOAT32, loco::DataType::U8,
+ luci::QuantizationGranularity::LayerWise);
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizedModelVerifier.h"
+
+#include "VerifyQuantizedNodeLayerWiseGranularity.h"
+#include "VerifyQuantizedNodeChannelWiseGranularity.h"
+#include "VerifyQuantizedNodeU8Type.h"
+#include "VerifyQuantizedNodeS16Type.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+namespace luci
+{
+
+void QuantizedModelVerifier::verify(loco::Graph *g)
+{
+ if (_quantized_dtype != Type::U8 && _quantized_dtype != Type::S16)
+ throw std::runtime_error("Unsupported quantized dtype");
+
+ if (_granularity != Granularity::ChannelWise && _granularity != Granularity::LayerWise)
+ throw std::runtime_error("Unsupported granularity");
+
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+
+ // Verify Type
+ if (_quantized_dtype == Type::U8)
+ {
+ VerifyQuantizedNodeU8Type vt;
+ if (!circle_node->accept(&vt))
+ throw std::runtime_error("Wrong data type");
+ }
+ else if (_quantized_dtype == Type::S16)
+ {
+ VerifyQuantizedNodeS16Type vt;
+ if (!circle_node->accept(&vt))
+ throw std::runtime_error("Wrong data type");
+ }
+
+ // Verify Granularity
+ if (_granularity == Granularity::LayerWise)
+ {
+ VerifyQuantizedNodeLayerWiseGranularity vg;
+ if (!circle_node->accept(&vg))
+ throw std::runtime_error("Wrong granularity");
+ }
+ else if (_granularity == Granularity::ChannelWise)
+ {
+ VerifyQuantizedNodeChannelWiseGranularity vg;
+ if (!circle_node->accept(&vg))
+ throw std::runtime_error("Wrong granularity");
+ }
+ }
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_QUANTIZED_MODEL_VERIFIER_H__
+#define __LUCI_QUANTIZED_MODEL_VERIFIER_H__
+
+#include "luci/Pass/QuantizationParameters.h"
+
+#include <loco.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to verify quantized model
+ *
+ * TODO Move this to luci/service
+ */
+struct QuantizedModelVerifier
+{
+
+public:
+ QuantizedModelVerifier(loco::DataType quantized_dtype, QuantizationGranularity granularity)
+ : _quantized_dtype(quantized_dtype), _granularity(granularity)
+ {
+ }
+
+ void verify(loco::Graph *g);
+
+private:
+ loco::DataType _quantized_dtype;
+ QuantizationGranularity _granularity;
+};
+
+} // namespace luci
+
+#endif // __LUCI_QUANTIZED_MODEL_VERIFIER_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizedModelVerifier.h"
+
+#include "luci/Pass/QuantizeWithMinMaxPass.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+using Type = loco::DataType;
+using Granularity = luci::QuantizationGranularity;
+
+namespace
+{
+
+/**
+ * @brief A helper function to create dummy const node
+ */
+template <Type T> luci::CircleConst *create_dummy_const(loco::Graph *g, luci::test::ShapeU32 shape)
+{
+ auto node = g->nodes()->create<luci::CircleConst>();
+ {
+ node->dtype(T);
+ node->shape(shape);
+ node->size<T>(luci::test::num_elements(shape));
+
+ for (int32_t i = 0; i < luci::test::num_elements(shape); i++)
+ {
+ // DESIGN NOTE
+ //
+ // Filling with any random numbers are fine
+ // Q. Should it include minus numbers?
+ switch (T)
+ {
+ case Type::FLOAT32:
+ // Fill with index
+ node->at<T>(i) = static_cast<float>(i);
+ break;
+ case Type::BOOL:
+ // Fill by flip
+ node->at<T>(i) = (i % 2) ? true : false;
+ break;
+ case Type::U8:
+ // Fill with index
+ node->at<T>(i) = static_cast<uint8_t>(i);
+ break;
+ case Type::S16:
+ // Fill with index
+ node->at<T>(i) = static_cast<int16_t>(i);
+ break;
+ }
+ }
+ }
+
+ return node;
+}
+
+/**
+ * @brief A helper function to create const node with value
+ */
+template <Type DT, typename T>
+luci::CircleConst *create_const(loco::Graph *g, luci::test::ShapeU32 shape,
+ std::initializer_list<T> values)
+{
+ auto node = g->nodes()->create<luci::CircleConst>();
+ {
+ node->dtype(DT);
+ node->shape(shape);
+ node->size<DT>(luci::test::num_elements(shape));
+
+ assert(values.size() == node->size<DT>());
+
+ uint32_t index = 0;
+ for (auto val : values)
+ {
+ node->at<DT>(index++) = static_cast<T>(val);
+ }
+ }
+
+ return node;
+}
+
+void insert_scale_zp(luci::CircleNode *node, float scale, int64_t zp)
+{
+ auto qparam = node->quantparam();
+ assert(qparam != nullptr); // FIX_CALLER_UNLESS
+ qparam->scale.push_back(scale);
+ qparam->zerop.push_back(zp);
+}
+
+void quantize_and_verify(loco::Graph *g, Type quantized_dtype, Granularity granularity)
+{
+ luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, quantized_dtype, granularity);
+ pass.run(g);
+
+ luci::QuantizedModelVerifier verifier(quantized_dtype, granularity);
+ verifier.verify(g);
+}
+
+// Helper function to reduce duplicate test codes
+// Assumption: g->output()->from() is the target node
+void quantize_and_verify_with_wrong_type(luci::test::TestIOGraph *g, Type quantized_dtype,
+ Granularity granularity, Type wrong_dtype)
+{
+ luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, quantized_dtype, granularity);
+ pass.run(g->g());
+
+ auto node = loco::must_cast<luci::CircleNode *>(g->output()->from());
+ node->dtype(wrong_dtype);
+
+ luci::QuantizedModelVerifier verifier(quantized_dtype, granularity);
+ verifier.verify(g->g());
+}
+
+// Helper function to reduce duplicate test codes
+// Assumption: g->output()->from() is the target node
+void quantize_and_verify_with_wrong_granularity(luci::test::TestIOGraph *g, Type quantized_dtype,
+ Granularity granularity)
+{
+ luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, quantized_dtype, granularity);
+ pass.run(g->g());
+
+ auto node = loco::must_cast<luci::CircleNode *>(g->output()->from());
+ insert_scale_zp(node, 1.0, 1);
+
+ luci::QuantizedModelVerifier verifier(quantized_dtype, granularity);
+ verifier.verify(g->g());
+}
+
+// Helper function to reduce duplicate test codes
+void quantize_and_verify_with_wrong_granularity(luci::test::TestIOGraph *g, Type quantized_dtype,
+ Granularity granularity, luci::CircleNode *target)
+{
+ luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, quantized_dtype, granularity);
+ pass.run(g->g());
+
+ insert_scale_zp(target, 1.0, 1);
+
+ luci::QuantizedModelVerifier verifier(quantized_dtype, granularity);
+ verifier.verify(g->g());
+}
+
+// Set min/max for all non-const nodes in the graph
+void set_minmax_to_non_const(loco::Graph *g, float min, float max)
+{
+ for (auto node : loco::all_nodes(g))
+ {
+ auto const_node = dynamic_cast<luci::CircleConst *>(node);
+ if (const_node != nullptr)
+ continue;
+
+ // Min/Max is not recorded for ArgMax
+ // See MinMaxObserver.cpp in record_minmax module
+ auto argmax_node = dynamic_cast<luci::CircleArgMax *>(node);
+ if (argmax_node != nullptr)
+ continue;
+
+ // Min/Max is not recorded for Split
+ // See MinMaxObserver.cpp in record_minmax module
+ auto split_node = dynamic_cast<luci::CircleSplit *>(node);
+ if (split_node != nullptr)
+ continue;
+
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ auto qparam = std::make_unique<luci::CircleQuantParam>();
+ {
+ qparam->min.emplace_back(min);
+ qparam->max.emplace_back(max);
+ }
+ circle_node->quantparam(std::move(qparam));
+ }
+}
+
+/**
+ * @brief Simple Test Graph
+ * @note
+ * The simple test graph's nodes are initialized with
+ * simple shapes and values.
+ */
+class SimpleTestGraph : public luci::test::TestIOGraph
+{
+public:
+ virtual void init(void) = 0;
+};
+
+class InstanceNormTestGraph final : public SimpleTestGraph
+{
+public:
+ void init(void) override
+ {
+ TestIOGraph::init({32}, {32});
+ _gamma = create_dummy_const<Type::FLOAT32>(g(), {32});
+ _beta = create_dummy_const<Type::FLOAT32>(g(), {32});
+ _instnorm = g()->nodes()->create<luci::CircleInstanceNorm>();
+ {
+ _instnorm->input(input());
+ _instnorm->gamma(_gamma);
+ _instnorm->beta(_beta);
+ }
+ output()->from(_instnorm);
+
+ set_minmax_to_non_const(g(), -1, 1);
+ }
+
+public:
+ loco::Node *gamma(void) const { return _instnorm->gamma(); }
+ loco::Node *beta(void) const { return _instnorm->beta(); }
+
+public:
+ luci::CircleInstanceNorm *_instnorm = nullptr;
+ luci::CircleConst *_input = nullptr;
+ luci::CircleConst *_gamma = nullptr;
+ luci::CircleConst *_beta = nullptr;
+};
+
+class LogisticTestGraph final : public SimpleTestGraph
+{
+public:
+ void init(void) override
+ {
+ TestIOGraph::init({32}, {32});
+ _logistic = g()->nodes()->create<luci::CircleLogistic>();
+ {
+ _logistic->x(input());
+ }
+ output()->from(_logistic);
+
+ set_minmax_to_non_const(g(), -1, 1);
+ }
+
+public:
+ luci::CircleLogistic *_logistic = nullptr;
+};
+
+class SoftmaxTestGraph final : public SimpleTestGraph
+{
+public:
+ void init(void) override
+ {
+ TestIOGraph::init({32}, {32});
+ _softmax = g()->nodes()->create<luci::CircleSoftmax>();
+ {
+ _softmax->logits(input());
+ _softmax->beta(0.1);
+ }
+ output()->from(_softmax);
+
+ set_minmax_to_non_const(g(), -1, 1);
+ }
+
+public:
+ luci::CircleSoftmax *_softmax = nullptr;
+};
+
+class SpaceToBatchNDTestGraph final : public SimpleTestGraph
+{
+public:
+ void init(void) override
+ {
+ TestIOGraph::init({1, 2, 2, 1}, {4, 1, 1, 1});
+ _block_shape = create_dummy_const<Type::S32>(g(), {2});
+ for (uint32_t i = 0; i < 2; i++)
+ _block_shape->at<Type::S32>(i) = 2;
+
+ _paddings = create_dummy_const<Type::S32>(g(), {2, 2});
+ for (uint32_t i = 0; i < 4; i++)
+ _paddings->at<Type::S32>(i) = 0;
+
+ _stob = g()->nodes()->create<luci::CircleSpaceToBatchND>();
+ {
+ _stob->input(input());
+ _stob->block_shape(_block_shape);
+ _stob->paddings(_paddings);
+ }
+ output()->from(_stob);
+
+ set_minmax_to_non_const(g(), -1, 1);
+ }
+
+public:
+ luci::CircleSpaceToBatchND *_stob = nullptr;
+ luci::CircleConst *_block_shape = nullptr;
+ luci::CircleConst *_paddings = nullptr;
+};
+
+class SpaceToDepthTestGraph final : public SimpleTestGraph
+{
+public:
+ void init(void) override
+ {
+ TestIOGraph::init({1, 2, 2, 1}, {1, 1, 1, 4});
+ _stod = g()->nodes()->create<luci::CircleSpaceToDepth>();
+ {
+ _stod->input(input());
+ _stod->block_size(2);
+ }
+ output()->from(_stod);
+
+ set_minmax_to_non_const(g(), -1, 1);
+ }
+
+public:
+ luci::CircleSpaceToDepth *_stod = nullptr;
+};
+
+template <Type indexT> class SliceTestGraph final : public SimpleTestGraph
+{
+public:
+ void init(void) override
+ {
+ TestIOGraph::init({32}, {32});
+ _begin = g()->nodes()->create<luci::CircleConst>();
+ {
+ _begin->dtype(indexT);
+ }
+ _size = g()->nodes()->create<luci::CircleConst>();
+ {
+ _size->dtype(indexT);
+ }
+ _slice = g()->nodes()->create<luci::CircleSlice>();
+ {
+ _slice->input(input());
+ _slice->begin(_begin);
+ _slice->size(_size);
+ }
+ output()->from(_slice);
+
+ set_minmax_to_non_const(g(), -1, 1);
+ }
+
+public:
+ luci::CircleSlice *_slice = nullptr;
+ luci::CircleConst *_begin = nullptr;
+ luci::CircleConst *_size = nullptr;
+};
+
+class SplitTestGraph final : public luci::test::TestIOGraph
+{
+public:
+ void init(void)
+ {
+ TestIOGraph::init({1, 32}, {32});
+ _split_dim = create_dummy_const<Type::S32>(g(), {1});
+ _split = g()->nodes()->create<luci::CircleSplit>();
+ {
+ _split->input(input());
+ _split->split_dim(_split_dim);
+ }
+ _split_o1 = g()->nodes()->create<luci::CircleSplitOut>();
+ {
+ _split_o1->input(_split);
+ _split_o1->index(0);
+ }
+
+ output()->from(_split_o1);
+
+ set_minmax_to_non_const(g(), -1, 1);
+ }
+
+public:
+ luci::CircleSplit *_split = nullptr;
+ luci::CircleSplitOut *_split_o1 = nullptr;
+ luci::CircleConst *_split_dim = nullptr;
+};
+
+class StridedSliceTestGraph final : public SimpleTestGraph
+{
+public:
+ void init(void) override
+ {
+ TestIOGraph::init({32}, {32});
+ _begin = g()->nodes()->create<luci::CircleConst>();
+ {
+ _begin->dtype(Type::S32);
+ }
+ _end = g()->nodes()->create<luci::CircleConst>();
+ {
+ _end->dtype(Type::S32);
+ }
+ _strides = g()->nodes()->create<luci::CircleConst>();
+ {
+ _strides->dtype(Type::S32);
+ }
+ _slice = g()->nodes()->create<luci::CircleStridedSlice>();
+ {
+ _slice->input(input());
+ _slice->begin(_begin);
+ _slice->end(_end);
+ _slice->strides(_strides);
+ }
+ output()->from(_slice);
+
+ set_minmax_to_non_const(g(), -1, 1);
+ }
+
+public:
+ luci::CircleStridedSlice *_slice = nullptr;
+ luci::CircleConst *_begin = nullptr;
+ luci::CircleConst *_end = nullptr;
+ luci::CircleConst *_strides = nullptr;
+};
+
+class ReshapeTestGraph final : public SimpleTestGraph
+{
+public:
+ void init(void) override
+ {
+ TestIOGraph::init({32}, {32});
+ _shape = g()->nodes()->create<luci::CircleConst>();
+ {
+ _shape->dtype(Type::S32);
+ }
+ _reshape = g()->nodes()->create<luci::CircleReshape>();
+ {
+ _reshape->tensor(input());
+ _reshape->shape(_shape);
+ }
+ output()->from(_reshape);
+
+ set_minmax_to_non_const(g(), -1, 1);
+ }
+
+public:
+ luci::CircleReshape *_reshape = nullptr;
+ luci::CircleConst *_shape = nullptr;
+};
+
+class TanhTestGraph final : public SimpleTestGraph
+{
+public:
+ void init(void) override
+ {
+ TestIOGraph::init({32}, {32});
+ _tanh = g()->nodes()->create<luci::CircleTanh>();
+ {
+ _tanh->x(input());
+ }
+ output()->from(_tanh);
+
+ set_minmax_to_non_const(g(), -1, 1);
+ }
+
+public:
+ luci::CircleTanh *_tanh = nullptr;
+};
+
+class FloorTestGraph final : public SimpleTestGraph
+{
+public:
+ void init(void) override
+ {
+ TestIOGraph::init({32}, {32});
+ _floor = g()->nodes()->create<luci::CircleFloor>();
+ {
+ _floor->x(input());
+ }
+ output()->from(_floor);
+
+ set_minmax_to_non_const(g(), -1, 1);
+ }
+
+public:
+ luci::CircleFloor *_floor = nullptr;
+};
+
+template <Type indexT> class ArgMaxTestGraph final : public SimpleTestGraph
+{
+public:
+ void init(void) override
+ {
+ TestIOGraph::init({32}, {1});
+ // output dtype is float by default, but ArgMax should have indexType (s32/s64)
+ output()->dtype(indexT);
+ _dimension = g()->nodes()->create<luci::CircleConst>();
+ {
+ _dimension->dtype(indexT);
+ }
+ _argmax = g()->nodes()->create<luci::CircleArgMax>();
+ {
+ _argmax->input(input());
+ _argmax->dimension(_dimension);
+ _argmax->output_type(indexT);
+ _argmax->dtype(indexT);
+ }
+ output()->from(_argmax);
+
+ set_minmax_to_non_const(g(), -1, 1);
+ }
+
+public:
+ luci::CircleArgMax *_argmax = nullptr;
+ luci::CircleConst *_dimension = nullptr;
+};
+
+class BatchToSpaceNDTestGraph final : public SimpleTestGraph
+{
+public:
+ void init(void) override
+ {
+ TestIOGraph::init({32}, {32});
+ _block_shape = g()->nodes()->create<luci::CircleConst>();
+ {
+ _block_shape->dtype(Type::S32);
+ }
+ _crops = g()->nodes()->create<luci::CircleConst>();
+ {
+ _crops->dtype(Type::S32);
+ }
+ _btos = g()->nodes()->create<luci::CircleBatchToSpaceND>();
+ {
+ _btos->input(input());
+ _btos->block_shape(_block_shape);
+ _btos->crops(_crops);
+ }
+ output()->from(_btos);
+
+ set_minmax_to_non_const(g(), -1, 1);
+ }
+
+public:
+ luci::CircleBatchToSpaceND *_btos = nullptr;
+ luci::CircleConst *_block_shape = nullptr;
+ luci::CircleConst *_crops = nullptr;
+};
+
+class DepthToSpaceTestGraph final : public SimpleTestGraph
+{
+public:
+ void init(void) override
+ {
+ TestIOGraph::init({1, 1, 1, 4}, {1, 2, 2, 1});
+ _dtos = g()->nodes()->create<luci::CircleDepthToSpace>();
+ {
+ _dtos->input(input());
+ _dtos->block_size(2);
+ }
+ output()->from(_dtos);
+
+ set_minmax_to_non_const(g(), -1, 1);
+ }
+
+public:
+ luci::CircleDepthToSpace *_dtos = nullptr;
+};
+
+class PadTestGraph final : public SimpleTestGraph
+{
+public:
+ void init(void) override
+ {
+ TestIOGraph::init({32}, {32});
+ _paddings = g()->nodes()->create<luci::CircleConst>();
+ {
+ _paddings->dtype(Type::S32);
+ }
+ _pad = g()->nodes()->create<luci::CirclePad>();
+ {
+ _pad->input(input());
+ _pad->paddings(_paddings);
+ }
+ output()->from(_pad);
+
+ set_minmax_to_non_const(g(), -1, 1);
+ }
+
+public:
+ luci::CirclePad *_pad = nullptr;
+ luci::CircleConst *_paddings = nullptr;
+};
+
+class TransposeTestGraph final : public SimpleTestGraph
+{
+public:
+ void init(void) override
+ {
+ TestIOGraph::init({32}, {32});
+ _perm = g()->nodes()->create<luci::CircleConst>();
+ {
+ _perm->dtype(Type::S32);
+ }
+ _transpose = g()->nodes()->create<luci::CircleTranspose>();
+ {
+ _transpose->a(input());
+ _transpose->perm(_perm);
+ }
+ output()->from(_transpose);
+
+ set_minmax_to_non_const(g(), -1, 1);
+ }
+
+public:
+ luci::CircleTranspose *_transpose = nullptr;
+ luci::CircleConst *_perm = nullptr;
+};
+
+class ConcatenationTestGraph final : public SimpleTestGraph
+{
+public:
+ void init(void) override
+ {
+ TestIOGraph::init({16}, {32});
+ _param = create_dummy_const<Type::FLOAT32>(g(), {16});
+ _concat = g()->nodes()->create<luci::CircleConcatenation>(2);
+ {
+ _concat->values(0, input());
+ _concat->values(1, _param);
+ _concat->axis(0);
+ }
+ output()->from(_concat);
+
+ set_minmax_to_non_const(g(), -1, 1);
+ }
+
+public:
+ luci::CircleConcatenation *_concat = nullptr;
+ luci::CircleConst *_param = nullptr;
+};
+
+// Test graph for comparison Ops
+// GREATER, GREATER_EQUAL, LESS, LESS_EQUAL, EQUAL, NOT_EQUAL
+template <class Op> class ComparisonOpTestGraph final : public SimpleTestGraph
+{
+public:
+ void init(void) override
+ {
+ TestIOGraph::init({32}, {32});
+ output()->dtype(loco::DataType::BOOL);
+ _y = create_dummy_const<Type::FLOAT32>(g(), {32});
+ _op = g()->nodes()->create<Op>();
+ {
+ _op->x(input());
+ _op->y(_y);
+ _op->dtype(loco::DataType::BOOL);
+ }
+ output()->from(_op);
+
+ set_minmax_to_non_const(g(), -1, 1);
+ }
+
+ loco::Node *x(void) const { return _op->x(); }
+ loco::Node *y(void) const { return _op->y(); }
+
+public:
+ Op *_op = nullptr;
+ luci::CircleConst *_y = nullptr;
+};
+
+// Test graph for binary logical Ops
+// LOGICAL_OR, LOGICAL_AND
+template <class Op> class BinaryLogicalOpTestGraph final : public SimpleTestGraph
+{
+public:
+ void init(void) override
+ {
+ TestIOGraph::init({32}, {32});
+ input()->dtype(loco::DataType::BOOL);
+ output()->dtype(loco::DataType::BOOL);
+ _y = create_dummy_const<Type::BOOL>(g(), {32});
+ _op = g()->nodes()->create<Op>();
+ {
+ _op->x(input());
+ _op->y(_y);
+ _op->dtype(loco::DataType::BOOL);
+ }
+ output()->from(_op);
+
+ set_minmax_to_non_const(g(), -1, 1);
+ }
+
+ loco::Node *x(void) const { return _op->x(); }
+ loco::Node *y(void) const { return _op->y(); }
+
+public:
+ Op *_op = nullptr;
+ luci::CircleConst *_y = nullptr;
+};
+
+class DivTestGraph final : public SimpleTestGraph
+{
+public:
+ void init(void) override
+ {
+ TestIOGraph::init({32}, {32});
+
+ _const = create_dummy_const<Type::FLOAT32>(g(), {32});
+ _div = g()->nodes()->create<luci::CircleDiv>();
+ {
+ _div->x(input());
+ _div->y(_const);
+ }
+ output()->from(_div);
+
+ set_minmax_to_non_const(g(), -1, 1);
+ }
+
+ loco::Node *x() { return _div->x(); }
+
+ loco::Node *y() { return _div->y(); }
+
+private:
+ luci::CircleDiv *_div = nullptr;
+ luci::CircleConst *_const = nullptr;
+};
+
+class FloorDivTestGraph final : public SimpleTestGraph
+{
+public:
+ void init(void) override
+ {
+ TestIOGraph::init({32}, {32});
+
+ _const = create_dummy_const<Type::FLOAT32>(g(), {32});
+ _floor_div = g()->nodes()->create<luci::CircleFloorDiv>();
+ {
+ _floor_div->x(input());
+ _floor_div->y(_const);
+ }
+ output()->from(_floor_div);
+
+ set_minmax_to_non_const(g(), -1, 1);
+ }
+
+ loco::Node *x() { return _floor_div->x(); }
+
+ loco::Node *y() { return _floor_div->y(); }
+
+private:
+ luci::CircleFloorDiv *_floor_div = nullptr;
+ luci::CircleConst *_const = nullptr;
+};
+
+class RsqrtTestGraph final : public SimpleTestGraph
+{
+public:
+ void init(void) override
+ {
+ TestIOGraph::init({32}, {32});
+ _rsqrt = g()->nodes()->create<luci::CircleRsqrt>();
+ {
+ _rsqrt->x(input());
+ }
+ output()->from(_rsqrt);
+
+ set_minmax_to_non_const(g(), -1, 1);
+ }
+
+public:
+ luci::CircleRsqrt *_rsqrt = nullptr;
+};
+
+class SqrtTestGraph final : public SimpleTestGraph
+{
+public:
+ void init(void) override
+ {
+ TestIOGraph::init({32}, {32});
+ _sqrt = g()->nodes()->create<luci::CircleSqrt>();
+ {
+ _sqrt->x(input());
+ }
+ output()->from(_sqrt);
+
+ set_minmax_to_non_const(g(), -1, 1);
+ }
+
+public:
+ luci::CircleSqrt *_sqrt = nullptr;
+};
+
+class EluTestGraph final : public SimpleTestGraph
+{
+public:
+ void init(void) override
+ {
+ TestIOGraph::init({32}, {32});
+ _elu = g()->nodes()->create<luci::CircleElu>();
+ {
+ _elu->features(input());
+ }
+ output()->from(_elu);
+
+ set_minmax_to_non_const(g(), -1, 1);
+ }
+
+public:
+ luci::CircleElu *_elu = nullptr;
+};
+
+class PowTestGraph final : public SimpleTestGraph
+{
+public:
+ void init(void) override
+ {
+ TestIOGraph::init({32}, {32});
+
+ _const = create_dummy_const<Type::FLOAT32>(g(), {32});
+ _pow = g()->nodes()->create<luci::CirclePow>();
+ {
+ _pow->x(input());
+ _pow->y(_const);
+ }
+ output()->from(_pow);
+
+ set_minmax_to_non_const(g(), -1, 1);
+ }
+
+ loco::Node *x() { return _pow->x(); }
+
+ loco::Node *y() { return _pow->y(); }
+
+private:
+ luci::CirclePow *_pow = nullptr;
+ luci::CircleConst *_const = nullptr;
+};
+
+class ResizeBilinearTestGraph final : public SimpleTestGraph
+{
+public:
+ void init(void) override
+ {
+ TestIOGraph::init({1, 4, 4, 1}, {1, 8, 8, 1});
+
+ _size = create_const<Type::S32, int32_t>(g(), {2}, {8, 8});
+ _resize_bilinear = g()->nodes()->create<luci::CircleResizeBilinear>();
+ {
+ _resize_bilinear->input(input());
+ _resize_bilinear->size(_size);
+ }
+ output()->from(_resize_bilinear);
+
+ set_minmax_to_non_const(g(), -1, 1);
+ }
+
+private:
+ luci::CircleResizeBilinear *_resize_bilinear = nullptr;
+ luci::CircleConst *_size = nullptr;
+};
+
+} // namespace
+
+// Quantize and verify with given configurations
+#define TEST_WITH_GRAPH(graph, type, granularity) \
+ do \
+ { \
+ graph g; \
+ g.init(); \
+ EXPECT_NO_THROW(quantize_and_verify(g.g(), type, granularity)); \
+ } while (0)
+
+// Quantize and verify with wrong type
+#define TEST_WITH_WRONG_TYPE(graph, type, granularity, wrong_dtype) \
+ do \
+ { \
+ graph g; \
+ g.init(); \
+ EXPECT_ANY_THROW(quantize_and_verify_with_wrong_type(&g, type, granularity, wrong_dtype)); \
+ } while (0)
+
+// Quantize and verify with wrong granularity
+#define TEST_WITH_WRONG_GRANULARITY(graph, type, granularity) \
+ do \
+ { \
+ graph g; \
+ g.init(); \
+ EXPECT_ANY_THROW(quantize_and_verify_with_wrong_granularity(&g, type, granularity)); \
+ } while (0)
+
+// Quantize and verify with wrong granularity
+// Users can specify the test target
+#define TEST_WITH_WRONG_GRANULARITY_TARGET(graph, type, granularity, target) \
+ do \
+ { \
+ graph g; \
+ g.init(); \
+ auto node = loco::must_cast<luci::CircleNode *>(target); \
+ EXPECT_ANY_THROW(quantize_and_verify_with_wrong_granularity(&g, type, granularity, node)); \
+ } while (0)
+
+// Test a local helper function
+TEST(QuantizedModelVerifierTest, LocalCreateDummyConst)
+{
+ loco::Graph g;
+
+ EXPECT_NO_THROW(create_dummy_const<Type::FLOAT32>(&g, {32, 32}));
+}
+
+TEST(QuantizedModelVerifierTest, LocalCreateConst)
+{
+ loco::Graph g;
+ std::initializer_list<float> values = {0.1, 0, -5, 100};
+ luci::CircleConst *node = create_const<Type::FLOAT32, float>(&g, {2, 2}, values);
+
+ uint32_t index = 0;
+ for (auto val : values)
+ {
+ EXPECT_EQ(node->at<Type::FLOAT32>(index++), val);
+ }
+}
+
+TEST(QuantizedModelVerifierTest, InstanceNorm)
+{
+ TEST_WITH_GRAPH(InstanceNormTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(InstanceNormTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(InstanceNormTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, InstanceNorm_wrong_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(InstanceNormTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(InstanceNormTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(InstanceNormTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, InstanceNorm_wrong_granularity_NEG)
+{
+ TEST_WITH_WRONG_GRANULARITY(InstanceNormTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_WRONG_GRANULARITY(InstanceNormTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_WRONG_GRANULARITY(InstanceNormTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Logistic)
+{
+ TEST_WITH_GRAPH(LogisticTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(LogisticTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(LogisticTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Logistic_wrong_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(LogisticTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(LogisticTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(LogisticTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Logistic_wrong_granularity_NEG)
+{
+ TEST_WITH_WRONG_GRANULARITY(LogisticTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_WRONG_GRANULARITY(LogisticTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_WRONG_GRANULARITY(LogisticTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Softmax)
+{
+ TEST_WITH_GRAPH(SoftmaxTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(SoftmaxTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(SoftmaxTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Softmax_wrong_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(SoftmaxTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(SoftmaxTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(SoftmaxTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Softmax_wrong_granularity_NEG)
+{
+ TEST_WITH_WRONG_GRANULARITY(SoftmaxTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_WRONG_GRANULARITY(SoftmaxTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_WRONG_GRANULARITY(SoftmaxTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, SpaceToBatchND)
+{
+ TEST_WITH_GRAPH(SpaceToBatchNDTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(SpaceToBatchNDTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(SpaceToBatchNDTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, SpaceToBatchND_wrong_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(SpaceToBatchNDTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(SpaceToBatchNDTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(SpaceToBatchNDTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, SpaceToBatchND_wrong_granularity_NEG)
+{
+ TEST_WITH_WRONG_GRANULARITY(SpaceToBatchNDTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_WRONG_GRANULARITY(SpaceToBatchNDTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_WRONG_GRANULARITY(SpaceToBatchNDTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, SpaceToDepth)
+{
+ TEST_WITH_GRAPH(SpaceToDepthTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(SpaceToDepthTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(SpaceToDepthTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, SpaceToDepth_wrong_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(SpaceToDepthTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(SpaceToDepthTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(SpaceToDepthTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, SpaceToDepth_wrong_granularity_NEG)
+{
+ TEST_WITH_WRONG_GRANULARITY(SpaceToDepthTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_WRONG_GRANULARITY(SpaceToDepthTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_WRONG_GRANULARITY(SpaceToDepthTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Slice)
+{
+ TEST_WITH_GRAPH(SliceTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(SliceTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(SliceTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_GRAPH(SliceTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(SliceTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(SliceTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Slice_wrong_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(SliceTestGraph<Type::S32>, Type::U8, Granularity::LayerWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(SliceTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(SliceTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise, Type::U8);
+
+ TEST_WITH_WRONG_TYPE(SliceTestGraph<Type::S64>, Type::U8, Granularity::LayerWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(SliceTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(SliceTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise, Type::U8);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Slice_wrong_granularity_NEG)
+{
+ TEST_WITH_WRONG_GRANULARITY(SliceTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+ TEST_WITH_WRONG_GRANULARITY(SliceTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_WRONG_GRANULARITY(SliceTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_WRONG_GRANULARITY(SliceTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+ TEST_WITH_WRONG_GRANULARITY(SliceTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_WRONG_GRANULARITY(SliceTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Split)
+{
+ TEST_WITH_GRAPH(SplitTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(SplitTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(SplitTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Split_wrong_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(SplitTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(SplitTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(SplitTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Split_wrong_granularity_NEG)
+{
+ TEST_WITH_WRONG_GRANULARITY(SplitTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_WRONG_GRANULARITY(SplitTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_WRONG_GRANULARITY(SplitTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, StridedSlice)
+{
+ TEST_WITH_GRAPH(StridedSliceTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(StridedSliceTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(StridedSliceTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, StridedSlice_wrong_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(StridedSliceTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(StridedSliceTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(StridedSliceTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, StridedSlice_wrong_granularity_NEG)
+{
+ TEST_WITH_WRONG_GRANULARITY(StridedSliceTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_WRONG_GRANULARITY(StridedSliceTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_WRONG_GRANULARITY(StridedSliceTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, ArgMax)
+{
+ TEST_WITH_GRAPH(ArgMaxTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(ArgMaxTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(ArgMaxTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_GRAPH(ArgMaxTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(ArgMaxTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(ArgMaxTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, ArgMax_wrong_dimension_type_NEG)
+{
+ ArgMaxTestGraph<Type::S32> g;
+ g.init();
+ luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, Type::U8, Granularity::LayerWise);
+ pass.run(g.g());
+
+ g._dimension->dtype(Type::U8);
+
+ luci::QuantizedModelVerifier verifier(Type::U8, Granularity::LayerWise);
+ EXPECT_ANY_THROW(verifier.verify(g.g()));
+}
+
+TEST(QuantizedModelVerifierTest, ArgMax_wrong_input_granularity_NEG)
+{
+ ArgMaxTestGraph<Type::S32> g;
+ g.init();
+
+ luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, Type::U8, Granularity::LayerWise);
+ pass.run(g.g());
+
+ insert_scale_zp(loco::must_cast<luci::CircleNode *>(g._argmax->input()), 1.0, 1);
+
+ luci::QuantizedModelVerifier verifier(Type::U8, Granularity::LayerWise);
+ EXPECT_ANY_THROW(verifier.verify(g.g()));
+}
+
+TEST(QuantizedModelVerifierTest, BatchToSpaceND)
+{
+ TEST_WITH_GRAPH(BatchToSpaceNDTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(BatchToSpaceNDTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(BatchToSpaceNDTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, BatchToSpaceND_wrong_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(BatchToSpaceNDTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(BatchToSpaceNDTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(BatchToSpaceNDTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, BatchToSpaceND_wrong_granularity_NEG)
+{
+ TEST_WITH_WRONG_GRANULARITY(BatchToSpaceNDTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_WRONG_GRANULARITY(BatchToSpaceNDTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_WRONG_GRANULARITY(BatchToSpaceNDTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, DepthToSpace)
+{
+ TEST_WITH_GRAPH(DepthToSpaceTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(DepthToSpaceTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(DepthToSpaceTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, DepthToSpace_wrong_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(DepthToSpaceTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(DepthToSpaceTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(DepthToSpaceTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, DepthToSpace_wrong_granularity_NEG)
+{
+ TEST_WITH_WRONG_GRANULARITY(DepthToSpaceTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_WRONG_GRANULARITY(DepthToSpaceTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_WRONG_GRANULARITY(DepthToSpaceTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Concatenation)
+{
+ TEST_WITH_GRAPH(ConcatenationTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(ConcatenationTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(ConcatenationTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Concatenation_wrong_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(ConcatenationTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(ConcatenationTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(ConcatenationTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Concatenation_wrong_granularity_NEG)
+{
+ TEST_WITH_WRONG_GRANULARITY(ConcatenationTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_WRONG_GRANULARITY(ConcatenationTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_WRONG_GRANULARITY(ConcatenationTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, LogicalOr)
+{
+ TEST_WITH_GRAPH(BinaryLogicalOpTestGraph<luci::CircleLogicalOr>, Type::U8,
+ Granularity::LayerWise);
+ TEST_WITH_GRAPH(BinaryLogicalOpTestGraph<luci::CircleLogicalOr>, Type::U8,
+ Granularity::ChannelWise);
+ TEST_WITH_GRAPH(BinaryLogicalOpTestGraph<luci::CircleLogicalOr>, Type::S16,
+ Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, LogicalOr_wrong_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(BinaryLogicalOpTestGraph<luci::CircleLogicalOr>, Type::U8,
+ Granularity::LayerWise, Type::U8);
+ TEST_WITH_WRONG_TYPE(BinaryLogicalOpTestGraph<luci::CircleLogicalOr>, Type::U8,
+ Granularity::ChannelWise, Type::U8);
+ TEST_WITH_WRONG_TYPE(BinaryLogicalOpTestGraph<luci::CircleLogicalOr>, Type::S16,
+ Granularity::ChannelWise, Type::S16);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Reshape)
+{
+ TEST_WITH_GRAPH(ReshapeTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(ReshapeTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(ReshapeTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Reshape_wrong_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(ReshapeTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(ReshapeTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(ReshapeTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Reshape_wrong_granularity_NEG)
+{
+ TEST_WITH_WRONG_GRANULARITY(ReshapeTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_WRONG_GRANULARITY(ReshapeTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_WRONG_GRANULARITY(ReshapeTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Tanh)
+{
+ TEST_WITH_GRAPH(TanhTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(TanhTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(TanhTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Tanh_wrong_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(TanhTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(TanhTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(TanhTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Tanh_wrong_granularity_NEG)
+{
+ TEST_WITH_WRONG_GRANULARITY(TanhTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_WRONG_GRANULARITY(TanhTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_WRONG_GRANULARITY(TanhTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Pad)
+{
+ TEST_WITH_GRAPH(PadTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(PadTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(PadTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Pad_wrong_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(PadTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(PadTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(PadTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Pad_wrong_granularity_NEG)
+{
+ TEST_WITH_WRONG_GRANULARITY(PadTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_WRONG_GRANULARITY(PadTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_WRONG_GRANULARITY(PadTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Transpose)
+{
+ TEST_WITH_GRAPH(TransposeTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(TransposeTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(TransposeTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Transpose_wrong_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(TransposeTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(TransposeTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(TransposeTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Transpose_wrong_granularity_NEG)
+{
+ TEST_WITH_WRONG_GRANULARITY(TransposeTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_WRONG_GRANULARITY(TransposeTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_WRONG_GRANULARITY(TransposeTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Floor)
+{
+ TEST_WITH_GRAPH(FloorTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(FloorTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(FloorTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Floor_wrong_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(FloorTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(FloorTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(FloorTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Floor_wrong_granularity_NEG)
+{
+ TEST_WITH_WRONG_GRANULARITY(FloorTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_WRONG_GRANULARITY(FloorTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_WRONG_GRANULARITY(FloorTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, GreaterEqual)
+{
+ TEST_WITH_GRAPH(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::U8,
+ Granularity::LayerWise);
+ TEST_WITH_GRAPH(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::U8,
+ Granularity::ChannelWise);
+ TEST_WITH_GRAPH(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::S16,
+ Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, GreaterEqual_wrong_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::U8,
+ Granularity::LayerWise, Type::U8);
+ TEST_WITH_WRONG_TYPE(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::U8,
+ Granularity::ChannelWise, Type::U8);
+ TEST_WITH_WRONG_TYPE(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::S16,
+ Granularity::ChannelWise, Type::S16);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, GreaterEqual_wrong_granularity_NEG)
+{
+ TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::U8,
+ Granularity::LayerWise, g.x());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::U8,
+ Granularity::ChannelWise, g.x());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::S16,
+ Granularity::ChannelWise, g.x());
+
+ TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::U8,
+ Granularity::LayerWise, g.y());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::U8,
+ Granularity::ChannelWise, g.y());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::S16,
+ Granularity::ChannelWise, g.y());
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Greater)
+{
+ TEST_WITH_GRAPH(ComparisonOpTestGraph<luci::CircleGreater>, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(ComparisonOpTestGraph<luci::CircleGreater>, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(ComparisonOpTestGraph<luci::CircleGreater>, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Greater_wrong_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(ComparisonOpTestGraph<luci::CircleGreater>, Type::U8, Granularity::LayerWise,
+ Type::U8);
+ TEST_WITH_WRONG_TYPE(ComparisonOpTestGraph<luci::CircleGreater>, Type::U8,
+ Granularity::ChannelWise, Type::U8);
+ TEST_WITH_WRONG_TYPE(ComparisonOpTestGraph<luci::CircleGreater>, Type::S16,
+ Granularity::ChannelWise, Type::S16);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Greater_wrong_granularity_NEG)
+{
+ TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreater>, Type::U8,
+ Granularity::LayerWise, g.x());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreater>, Type::U8,
+ Granularity::ChannelWise, g.x());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreater>, Type::S16,
+ Granularity::ChannelWise, g.x());
+
+ TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreater>, Type::U8,
+ Granularity::LayerWise, g.y());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreater>, Type::U8,
+ Granularity::ChannelWise, g.y());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreater>, Type::S16,
+ Granularity::ChannelWise, g.y());
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, NotEqual)
+{
+ TEST_WITH_GRAPH(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, NotEqual_wrong_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::U8,
+ Granularity::LayerWise, Type::U8);
+ TEST_WITH_WRONG_TYPE(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::U8,
+ Granularity::ChannelWise, Type::U8);
+ TEST_WITH_WRONG_TYPE(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::S16,
+ Granularity::ChannelWise, Type::S16);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, NotEqual_wrong_granularity_NEG)
+{
+ TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::U8,
+ Granularity::LayerWise, g.x());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::U8,
+ Granularity::ChannelWise, g.x());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::S16,
+ Granularity::ChannelWise, g.x());
+
+ TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::U8,
+ Granularity::LayerWise, g.y());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::U8,
+ Granularity::ChannelWise, g.y());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::S16,
+ Granularity::ChannelWise, g.y());
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Div)
+{
+ TEST_WITH_GRAPH(DivTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(DivTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(DivTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Div_wrong_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(DivTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(DivTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(DivTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Div_wrong_granularity_NEG)
+{
+ TEST_WITH_WRONG_GRANULARITY_TARGET(DivTestGraph, Type::U8, Granularity::LayerWise, g.x());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(DivTestGraph, Type::U8, Granularity::ChannelWise, g.x());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(DivTestGraph, Type::S16, Granularity::ChannelWise, g.x());
+
+ TEST_WITH_WRONG_GRANULARITY_TARGET(DivTestGraph, Type::U8, Granularity::LayerWise, g.y());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(DivTestGraph, Type::U8, Granularity::ChannelWise, g.y());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(DivTestGraph, Type::S16, Granularity::ChannelWise, g.y());
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, FloorDiv)
+{
+ TEST_WITH_GRAPH(FloorDivTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(FloorDivTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(FloorDivTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, FloorDiv_wrong_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(FloorDivTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(FloorDivTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(FloorDivTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, FloorDiv_wrong_granularity_NEG)
+{
+ TEST_WITH_WRONG_GRANULARITY_TARGET(FloorDivTestGraph, Type::U8, Granularity::LayerWise, g.x());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(FloorDivTestGraph, Type::U8, Granularity::ChannelWise, g.x());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(FloorDivTestGraph, Type::S16, Granularity::ChannelWise, g.x());
+
+ TEST_WITH_WRONG_GRANULARITY_TARGET(FloorDivTestGraph, Type::U8, Granularity::LayerWise, g.y());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(FloorDivTestGraph, Type::U8, Granularity::ChannelWise, g.y());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(FloorDivTestGraph, Type::S16, Granularity::ChannelWise, g.y());
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Rsqrt)
+{
+ TEST_WITH_GRAPH(RsqrtTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(RsqrtTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(RsqrtTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Rsqrt_wrong_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(RsqrtTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(RsqrtTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(RsqrtTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Rsqrt_wrong_granularity_NEG)
+{
+ TEST_WITH_WRONG_GRANULARITY(RsqrtTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_WRONG_GRANULARITY(RsqrtTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_WRONG_GRANULARITY(RsqrtTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Sqrt)
+{
+ TEST_WITH_GRAPH(SqrtTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(SqrtTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(SqrtTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Sqrt_wrong_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(SqrtTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(SqrtTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(SqrtTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Sqrt_wrong_granularity_NEG)
+{
+ TEST_WITH_WRONG_GRANULARITY(SqrtTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_WRONG_GRANULARITY(SqrtTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_WRONG_GRANULARITY(SqrtTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Elu)
+{
+ TEST_WITH_GRAPH(EluTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(EluTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(EluTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Elu_wrong_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(EluTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(EluTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(EluTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Elu_wrong_granularity_NEG)
+{
+ TEST_WITH_WRONG_GRANULARITY(EluTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_WRONG_GRANULARITY(EluTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_WRONG_GRANULARITY(EluTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Pow)
+{
+ TEST_WITH_GRAPH(PowTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(PowTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(PowTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Pow_wrong_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(PowTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(PowTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(PowTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Pow_wrong_granularity_NEG)
+{
+ TEST_WITH_WRONG_GRANULARITY_TARGET(PowTestGraph, Type::U8, Granularity::LayerWise, g.x());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(PowTestGraph, Type::U8, Granularity::ChannelWise, g.x());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(PowTestGraph, Type::S16, Granularity::ChannelWise, g.x());
+
+ TEST_WITH_WRONG_GRANULARITY_TARGET(PowTestGraph, Type::U8, Granularity::LayerWise, g.y());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(PowTestGraph, Type::U8, Granularity::ChannelWise, g.y());
+ TEST_WITH_WRONG_GRANULARITY_TARGET(PowTestGraph, Type::S16, Granularity::ChannelWise, g.y());
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, ResizeBilinear)
+{
+ TEST_WITH_GRAPH(ResizeBilinearTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(ResizeBilinearTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(ResizeBilinearTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, ResizeBilinear_wrong_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(ResizeBilinearTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(ResizeBilinearTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(ResizeBilinearTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, ResizeBilinear_wrong_granularity_NEG)
+{
+ TEST_WITH_WRONG_GRANULARITY(ResizeBilinearTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_WRONG_GRANULARITY(ResizeBilinearTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_WRONG_GRANULARITY(ResizeBilinearTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+#undef TEST_WITH_GRAPH
+#undef TEST_WITH_WRONG_TYPE
+#undef TEST_WITH_WRONG_GRANULARITY
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveRedundantReshapePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+bool remove_redundant_reshape(luci::CircleReshape *node)
+{
+ auto pred_node = dynamic_cast<luci::CircleReshape *>(node->tensor());
+ if (pred_node == nullptr)
+ return false;
+
+ node->tensor(pred_node->tensor());
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * BEFORE
+ *
+ * [CircleNode]
+ * |
+ * [CircleReshape_1]
+ * |
+ * [CircleReshape_2]
+ * |
+ * [CircleNode]
+ *
+ * AFTER
+ *
+ * [CircleNode]
+ * / \
+ * [CircleReshape_1] [CircleReshape_2]
+ * |
+ * [CircleNode]
+ **/
+bool RemoveRedundantReshapePass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ if (auto reshape_node = dynamic_cast<luci::CircleReshape *>(node))
+ {
+ if (remove_redundant_reshape(reshape_node))
+ changed = true;
+ }
+ }
+ return changed;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci/Pass/RemoveRedundantReshapePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+class RemoveRedundantReshape : public ::testing::Test
+{
+public:
+ RemoveRedundantReshape() {}
+
+ void createReshapeConst(luci::CircleReshape *target, const std::vector<int32_t> shape)
+ {
+ auto shape_const = g.nodes()->create<luci::CircleConst>();
+ shape_const->dtype(loco::DataType::S32);
+ shape_const->size<loco::DataType::S32>(shape.size());
+ shape_const->shape_status(luci::ShapeStatus::VALID);
+ shape_const->rank(1);
+ shape_const->dim(0).set(shape.size());
+ for (int32_t i = 0; i < shape.size(); i++)
+ {
+ shape_const->at<loco::DataType::S32>(i) = shape.at(i);
+ }
+ shape_const->name("shape_const");
+ target->shape(shape_const);
+ }
+
+ void buildGraph(const std::initializer_list<uint32_t> base_shape,
+ const std::vector<int32_t> first_shape, const std::vector<int32_t> second_shape)
+ {
+ // Input Create.
+ input = g.nodes()->create<luci::CircleInput>();
+ auto graph_input = g.inputs()->create();
+ input->index(graph_input->index());
+ input->shape_status(luci::ShapeStatus::VALID);
+ input->rank(base_shape.size());
+ input->shape(base_shape);
+ input->name("input");
+
+ // Create first reshape.
+ first_reshape = g.nodes()->create<luci::CircleReshape>();
+ first_reshape->tensor(input);
+ first_reshape->name("Reshape");
+ createReshapeConst(first_reshape, first_shape);
+
+ // Create second reshape.
+ second_reshape = g.nodes()->create<luci::CircleReshape>();
+ second_reshape->tensor(first_reshape);
+ second_reshape->name("second_reshape");
+ createReshapeConst(second_reshape, second_shape);
+
+ // Output Connect.
+ output = g.nodes()->create<luci::CircleOutput>();
+ output->from(second_reshape);
+ output->name("output");
+ auto graph_output = g.outputs()->create();
+ output->index(graph_output->index());
+ }
+
+public:
+ loco::Graph g;
+ luci::CircleInput *input = nullptr;
+ luci::CircleReshape *first_reshape = nullptr;
+ luci::CircleReshape *second_reshape = nullptr;
+ luci::CircleOutput *output = nullptr;
+};
+
+} // namespace
+
+TEST(RemoveRedundantReshapePassTest, name)
+{
+ luci::RemoveRedundantReshapePass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+TEST_F(RemoveRedundantReshape, simple_case)
+{
+ buildGraph({4, 6}, {-1, 4, 6}, {1, -1, 2, 3});
+ luci::RemoveRedundantReshapePass pass;
+ while (pass.run(&g))
+ ;
+ int count = 0;
+ for (auto node : loco::active_nodes(loco::output_nodes(&g)))
+ {
+ if (auto reshape = dynamic_cast<luci::CircleReshape *>(node))
+ {
+ count++;
+ }
+ }
+ ASSERT_EQ(1, count);
+}
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci/Pass/RemoveRedundantTransposePass.h"
-
-#include <luci/IR/CircleNodes.h>
-
-namespace
-{
-
-/// @brief Return true if first_perm[second_perm[i]] == i
-bool check_perm(const luci::CircleConst *first_perm, const luci::CircleConst *second_perm)
-{
- assert(first_perm->rank() == 1);
- assert(second_perm->rank() == 1);
- assert(second_perm->size<loco::DataType::S32>() == first_perm->size<loco::DataType::S32>());
- for (int32_t i = 0; i < static_cast<int32_t>(first_perm->size<loco::DataType::S32>()); i++)
- {
- if (first_perm->at<loco::DataType::S32>(second_perm->at<loco::DataType::S32>(i)) != i)
- return false;
- }
- return true;
-}
-
-bool remove_consecutive_transpose_function(luci::CircleNode *node)
-{
- auto target_node = dynamic_cast<luci::CircleTranspose *>(node);
- if (target_node == nullptr)
- return false;
- auto pred_node = dynamic_cast<luci::CircleTranspose *>(target_node->a());
- if (pred_node == nullptr)
- return false;
- if (loco::succs(pred_node).size() != 1)
- return false;
-
- auto pred_perm = dynamic_cast<luci::CircleConst *>(target_node->perm());
- if (pred_perm == nullptr)
- return false;
-
- auto main_perm = dynamic_cast<luci::CircleConst *>(pred_node->perm());
- if (main_perm == nullptr)
- return false;
-
- auto main_node = loco::must_cast<luci::CircleNode *>(pred_node->a());
- if (check_perm(pred_perm, main_perm))
- {
- replace(node).with(main_node);
- }
- else
- {
- auto g = main_perm->graph();
- auto new_const_node = g->nodes()->create<luci::CircleConst>();
-
- new_const_node->dtype(loco::DataType::S32);
- new_const_node->rank(1);
- new_const_node->dim(0) = main_perm->dim(0);
- new_const_node->size<loco::DataType::S32>(main_perm->dim(0).value());
- new_const_node->shape_status(luci::ShapeStatus::VALID);
- for (uint32_t i = 0; i < main_perm->size<loco::DataType::S32>(); i++)
- {
- new_const_node->at<loco::DataType::S32>(i) =
- pred_perm->at<loco::DataType::S32>(main_perm->at<loco::DataType::S32>(i));
- }
- pred_node->perm(new_const_node);
- replace(node).with(pred_node);
- }
- return true;
-}
-
-} // namespace
-
-namespace luci
-{
-/**
- * BEFORE
- * |
- * [CircleNode] [CircleConst]
- * (main_node) (main_perm)
- * \ /
- * [CircleTranspose] [CircleConst]
- * (pred_node) (pred_perm)
- * \ /
- * [CircleTranspose]
- * (target_node)
- * |
- *
- * AFTER
- * <Optional Case>
- *
- * | | |
- * [CircleNode] [CircleConst] |
- * (main_node) (new_const_node) |
- * \ / or [CircleNode]
- * [CircleTranspose] (main_node)
- * (pred_node) |
- * | |
- *
- */
-bool RemoveRedundantTransposePass::run(loco::Graph *g)
-{
- bool changed = false;
- for (auto node : loco::active_nodes(loco::output_nodes(g)))
- {
- auto circle_node = loco::must_cast<luci::CircleNode *>(node);
- if (remove_consecutive_transpose_function(circle_node))
- {
- changed = true;
- break;
- }
- }
- return changed;
-}
-
-} // namespace luci
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "luci/Pass/RemoveRedundantTransposePass.h"
-
-#include <luci/IR/CircleNodes.h>
-
-#include <vector>
-
-#include <gtest/gtest.h>
-
-namespace
-{
-
-void setValue(luci::CircleConst *node, const std::vector<int> &v)
-{
- node->dtype(loco::DataType::S32);
- node->size<loco::DataType::S32>(v.size());
- node->rank(1);
- node->dim(0).set(v.size());
- for (int i = 0; i < v.size(); ++i)
- {
- node->at<loco::DataType::S32>(i) = v[i];
- }
-}
-
-/**
- * Type1
- * BEFORE
- * |
- * [CircleNode] [CircleConst]
- * \ /
- * [CircleTranspose] [CircleConst]
- * \ /
- * [CircleTranspose]
- * |
- *
- * AFTER
- * |
- * [CircleNode]
- * | Remove Both
- *
- * --------------------------------------------
- *
- * Type2
- * BEFORE
- * |
- * [CircleNode] [CircleConst]
- * \ /
- * [CircleTranspose] [CircleConst]
- * \ /
- * [CircleTranspose]
- * |
- *
- * AFTER
- * | |
- * [CircleNode] [CircleConst]
- * \ /
- * [CircleTranspose]
- * |
- *
- */
-void create_redundunt_transpose(loco::Graph *g, const std::vector<int32_t> &perm1,
- const std::vector<int32_t> &perm2)
-{
- assert(g);
-
- auto input = g->nodes()->create<luci::CircleInput>();
- auto graph_input = g->inputs()->create();
- input->index(graph_input->index());
-
- // Create perm1
- auto perm1_node = g->nodes()->create<luci::CircleConst>();
- setValue(perm1_node, perm1);
-
- auto transpose1 = g->nodes()->create<luci::CircleTranspose>();
- transpose1->dtype(loco::DataType::FLOAT32);
- transpose1->a(input);
- transpose1->perm(perm1_node);
-
- // Create perm2
- auto perm2_node = g->nodes()->create<luci::CircleConst>();
- setValue(perm2_node, perm2);
-
- auto transpose2 = g->nodes()->create<luci::CircleTranspose>();
- transpose2->dtype(loco::DataType::FLOAT32);
- transpose2->a(transpose1);
- transpose2->perm(perm2_node);
-
- // Output
- auto output = g->nodes()->create<luci::CircleOutput>();
- output->from(transpose2);
- auto graph_output = g->outputs()->create();
- output->index(graph_output->index());
-}
-
-} // namespace
-
-TEST(RemoveRedundantTransposePass, remove_consecutive_transpose_function_type1)
-{
- auto graph = loco::make_graph();
- create_redundunt_transpose(graph.get(), {1, 0, 2, 3}, {1, 0, 2, 3});
-
- luci::RemoveRedundantTransposePass pass;
- while (pass.run(graph.get()))
- ;
- luci::CircleTranspose *transpose_node = nullptr;
- for (auto node : loco::active_nodes(loco::output_nodes(graph.get())))
- {
- auto trans = dynamic_cast<luci::CircleTranspose *>(node);
- if (not trans)
- continue;
- transpose_node = trans;
- break;
- }
- // No transpose node is in graph.
- ASSERT_EQ(nullptr, transpose_node);
-}
-
-TEST(RemoveRedundantTransposePass, remove_consecutive_transpose_function_type2)
-{
- auto graph = loco::make_graph();
- create_redundunt_transpose(graph.get(), {0, 1, 3, 2}, {1, 0, 2, 3});
-
- luci::RemoveRedundantTransposePass pass;
- while (pass.run(graph.get()))
- ;
- luci::CircleTranspose *transpose_node = nullptr;
- for (auto node : loco::active_nodes(loco::output_nodes(graph.get())))
- {
- auto trans = dynamic_cast<luci::CircleTranspose *>(node);
- if (not trans)
- continue;
- transpose_node = trans;
- break;
- }
- // Just one transpose node, with updated perm constant.
- ASSERT_NE(nullptr, transpose_node);
- auto perm = loco::must_cast<luci::CircleConst *>(transpose_node->perm());
- ASSERT_EQ(1, perm->at<loco::DataType::S32>(0));
- ASSERT_EQ(0, perm->at<loco::DataType::S32>(1));
- ASSERT_EQ(3, perm->at<loco::DataType::S32>(2));
- ASSERT_EQ(2, perm->at<loco::DataType::S32>(3));
-}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveRedundantTransposePass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+
+/// @brief Return true if first_perm[second_perm[i]] == i
+bool check_perm(const luci::CircleConst *first_perm, const luci::CircleConst *second_perm)
+{
+ assert(first_perm->rank() == 1);
+ assert(second_perm->rank() == 1);
+ assert(second_perm->size<loco::DataType::S32>() == first_perm->size<loco::DataType::S32>());
+ for (int32_t i = 0; i < static_cast<int32_t>(first_perm->size<loco::DataType::S32>()); i++)
+ {
+ if (first_perm->at<loco::DataType::S32>(second_perm->at<loco::DataType::S32>(i)) != i)
+ return false;
+ }
+ return true;
+}
+
+bool remove_consecutive_transpose_function(luci::CircleTranspose *target_node)
+{
+ auto pred_node = dynamic_cast<luci::CircleTranspose *>(target_node->a());
+ if (pred_node == nullptr)
+ return false;
+
+ auto target_perm = dynamic_cast<luci::CircleConst *>(target_node->perm());
+ if (target_perm == nullptr)
+ return false;
+
+ auto pred_perm = dynamic_cast<luci::CircleConst *>(pred_node->perm());
+ if (pred_perm == nullptr)
+ return false;
+
+ auto main_node = loco::must_cast<luci::CircleNode *>(pred_node->a());
+ if (check_perm(target_perm, pred_perm))
+ {
+ replace(target_node).with(main_node);
+ }
+ else
+ {
+ auto name = target_node->name();
+ assert(name.length() > 0);
+
+ auto g = pred_perm->graph();
+ auto new_const_node = g->nodes()->create<luci::CircleConst>();
+
+ new_const_node->dtype(loco::DataType::S32);
+ new_const_node->rank(1);
+ new_const_node->dim(0) = pred_perm->dim(0);
+ new_const_node->size<loco::DataType::S32>(pred_perm->dim(0).value());
+ new_const_node->shape_status(luci::ShapeStatus::VALID);
+ for (uint32_t i = 0; i < pred_perm->size<loco::DataType::S32>(); i++)
+ {
+ new_const_node->at<loco::DataType::S32>(i) =
+ target_perm->at<loco::DataType::S32>(pred_perm->at<loco::DataType::S32>(i));
+ }
+ new_const_node->name(name + "/Transpose/perm");
+
+ // Create New Transpose Node
+ auto new_transpose_node = g->nodes()->create<luci::CircleTranspose>();
+ new_transpose_node->dtype(target_node->dtype());
+ new_transpose_node->a(main_node);
+ new_transpose_node->perm(new_const_node);
+ new_transpose_node->name(name + "/Transpose");
+ luci::add_origin(new_transpose_node, luci::get_origin(target_node));
+
+ replace(target_node).with(new_transpose_node);
+ }
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * BEFORE
+ * |
+ * [CircleNode] [CircleConst]
+ * | (pred_perm)
+ * \ /
+ * [CircleTranspose] [CircleConst]
+ * (pred_node) (target_perm)
+ * \ /
+ * [CircleTranspose]
+ * (target_node)
+ * |
+ *
+ * AFTER
+ * | |
+ * [CircleNode] [CircleConst](new) |
+ * \ / or [CircleNode]
+ * [CircleTranspose](new) |
+ * | |
+ */
+bool RemoveRedundantTransposePass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ if (auto transpose = dynamic_cast<luci::CircleTranspose *>(node))
+ {
+ if (remove_consecutive_transpose_function(transpose))
+ changed = true;
+ }
+ }
+ return changed;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci/Pass/RemoveRedundantTransposePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <vector>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+void setValue(luci::CircleConst *node, const std::vector<int> &v)
+{
+ node->dtype(loco::DataType::S32);
+ node->size<loco::DataType::S32>(v.size());
+ node->rank(1);
+ node->dim(0).set(v.size());
+ for (int i = 0; i < v.size(); ++i)
+ {
+ node->at<loco::DataType::S32>(i) = v[i];
+ }
+}
+
+/**
+ * Remove for consecutive Transpose
+ *
+ * Type1: Remove both Transpose
+ * BEFORE
+ * |
+ * [CircleNode] [CircleConst]
+ * \ /
+ * [CircleTranspose] [CircleConst]
+ * \ /
+ * [CircleTranspose]
+ * |
+ *
+ * AFTER
+ * |
+ * [CircleNode]
+ * |
+ *
+ * --------------------------------------------
+ *
+ * Type2: Merge to one Transpose
+ * BEFORE
+ * |
+ * [CircleNode] [CircleConst]
+ * \ /
+ * [CircleTranspose] [CircleConst]
+ * \ /
+ * [CircleTranspose]
+ * |
+ *
+ * AFTER
+ * |
+ * [CircleNode] [CircleConst]
+ * \ /
+ * [CircleTranspose]
+ * |
+ *
+ */
+void create_redundunt_transpose(loco::Graph *g, const std::vector<int32_t> &perm1,
+ const std::vector<int32_t> &perm2)
+{
+ assert(g);
+
+ auto input = g->nodes()->create<luci::CircleInput>();
+ auto graph_input = g->inputs()->create();
+ input->index(graph_input->index());
+ input->name("input");
+
+ // Create perm1
+ auto perm1_node = g->nodes()->create<luci::CircleConst>();
+ setValue(perm1_node, perm1);
+ perm1_node->name("perm1_node");
+
+ auto transpose1 = g->nodes()->create<luci::CircleTranspose>();
+ transpose1->dtype(loco::DataType::FLOAT32);
+ transpose1->a(input);
+ transpose1->perm(perm1_node);
+ transpose1->name("transpose1");
+
+ // Create perm2
+ auto perm2_node = g->nodes()->create<luci::CircleConst>();
+ setValue(perm2_node, perm2);
+ perm2_node->name("perm2_node");
+
+ auto transpose2 = g->nodes()->create<luci::CircleTranspose>();
+ transpose2->dtype(loco::DataType::FLOAT32);
+ transpose2->a(transpose1);
+ transpose2->perm(perm2_node);
+ transpose2->name("transpose2");
+
+ // Output
+ auto output = g->nodes()->create<luci::CircleOutput>();
+ output->from(transpose2);
+ auto graph_output = g->outputs()->create();
+ output->index(graph_output->index());
+ output->name("output");
+}
+
+/**
+ * Remove for consecutive Transposes with branching
+ *
+ * BEFORE
+ * |
+ * [CircleNode] [CircleConst]
+ * \ /
+ * [CircleConst] [CircleTranspose] [CircleConst]
+ * \ / \ /
+ * [CircleTranspose] [CircleTranspose]
+ * | |
+ * [CircleNode] [CircleNode]
+ * | |
+ *
+ * AFTER
+ * Type 1: Remove all Transpose
+ * |
+ * [CircleNode]
+ * / \
+ * [CircleNode] [CircleNode]
+ * | |
+ *
+ * Type 2: Remove both for one side and create new for another side
+ * |
+ * [CircleNode] [CircleConst](new)
+ * / \ /
+ * / [CircleTranspose](new)
+ * | |
+ * [CircleNode] [CircleNode]
+ * | |
+ */
+void create_redundunt_transpose_with_branch(loco::Graph *g, const std::vector<int32_t> &perm1,
+ const std::vector<int32_t> &perm2,
+ const std::vector<int32_t> &perm3)
+{
+ assert(g);
+
+ auto input = g->nodes()->create<luci::CircleInput>();
+ auto graph_input = g->inputs()->create();
+ input->dtype(loco::DataType::FLOAT32);
+ input->index(graph_input->index());
+ input->name("input");
+ graph_input->dtype(loco::DataType::FLOAT32);
+
+ graph_input->shape({4, 4, 4, 4});
+ input->shape({4, 4, 4, 4});
+
+ // Create perm1
+ auto perm1_node = g->nodes()->create<luci::CircleConst>();
+ setValue(perm1_node, perm1);
+ perm1_node->name("perm1_node");
+
+ auto transpose1 = g->nodes()->create<luci::CircleTranspose>();
+ transpose1->dtype(loco::DataType::FLOAT32);
+ transpose1->a(input);
+ transpose1->perm(perm1_node);
+ transpose1->name("transpose1");
+
+ // Create perm2
+ auto perm2_node = g->nodes()->create<luci::CircleConst>();
+ setValue(perm2_node, perm2);
+ perm2_node->name("perm2_node");
+
+ auto transpose2 = g->nodes()->create<luci::CircleTranspose>();
+ transpose2->dtype(loco::DataType::FLOAT32);
+ transpose2->a(transpose1);
+ transpose2->perm(perm2_node);
+ transpose2->name("transpose2");
+
+ // create perm3
+ auto perm3_node = g->nodes()->create<luci::CircleConst>();
+ setValue(perm3_node, perm3);
+ perm3_node->name("perm3_node");
+
+ auto transpose3 = g->nodes()->create<luci::CircleTranspose>();
+ transpose3->dtype(loco::DataType::FLOAT32);
+ transpose3->a(transpose1);
+ transpose3->perm(perm3_node);
+ transpose3->name("transpose3");
+
+ // Output
+ auto output1 = g->nodes()->create<luci::CircleOutput>();
+ output1->from(transpose2);
+ output1->name("output1");
+ auto output2 = g->nodes()->create<luci::CircleOutput>();
+ output2->from(transpose3);
+ output2->name("output2");
+ auto graph_output1 = g->outputs()->create();
+ output1->index(graph_output1->index());
+ auto graph_output2 = g->outputs()->create();
+ output2->index(graph_output2->index());
+ output1->dtype(loco::DataType::FLOAT32);
+ output2->dtype(loco::DataType::FLOAT32);
+ graph_output1->dtype(loco::DataType::FLOAT32);
+ graph_output2->dtype(loco::DataType::FLOAT32);
+ output1->shape({4, 4, 4, 4});
+ output2->shape({4, 4, 4, 4});
+ graph_output1->shape({4, 4, 4, 4});
+ graph_output2->shape({4, 4, 4, 4});
+}
+
+} // namespace
+
+TEST(RemoveRedundantTransposePassTest, name)
+{
+ luci::RemoveRedundantTransposePass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+TEST(RemoveRedundantTransposePass, remove_consecutive_transpose_function_type1)
+{
+ auto graph = loco::make_graph();
+ create_redundunt_transpose(graph.get(), {1, 0, 2, 3}, {1, 0, 2, 3});
+
+ luci::RemoveRedundantTransposePass pass;
+ while (pass.run(graph.get()))
+ ;
+ luci::CircleTranspose *transpose_node = nullptr;
+ for (auto node : loco::active_nodes(loco::output_nodes(graph.get())))
+ {
+ auto trans = dynamic_cast<luci::CircleTranspose *>(node);
+ if (not trans)
+ continue;
+ transpose_node = trans;
+ break;
+ }
+ // No transpose node is in graph.
+ ASSERT_EQ(nullptr, transpose_node);
+}
+
+TEST(RemoveRedundantTransposePass, remove_consecutive_transpose_function_type2)
+{
+ auto graph = loco::make_graph();
+ create_redundunt_transpose(graph.get(), {0, 1, 3, 2}, {1, 0, 2, 3});
+
+ luci::RemoveRedundantTransposePass pass;
+ while (pass.run(graph.get()))
+ ;
+ luci::CircleTranspose *transpose_node = nullptr;
+ for (auto node : loco::active_nodes(loco::output_nodes(graph.get())))
+ {
+ auto trans = dynamic_cast<luci::CircleTranspose *>(node);
+ if (not trans)
+ continue;
+ transpose_node = trans;
+ break;
+ }
+ // Just one transpose node, with updated perm constant.
+ ASSERT_NE(nullptr, transpose_node);
+ auto perm = loco::must_cast<luci::CircleConst *>(transpose_node->perm());
+ ASSERT_EQ(1, perm->at<loco::DataType::S32>(0));
+ ASSERT_EQ(0, perm->at<loco::DataType::S32>(1));
+ ASSERT_EQ(3, perm->at<loco::DataType::S32>(2));
+ ASSERT_EQ(2, perm->at<loco::DataType::S32>(3));
+}
+
+/**
+ * @brief Test case that first transpose output become input of operations more than one.
+ */
+TEST(RemoveRedundantTransposePass, remove_consecutive_transpose_function_with_branch_remove_case)
+{
+ auto graph = loco::make_graph();
+ create_redundunt_transpose_with_branch(graph.get(), {1, 0, 2, 3}, {1, 0, 2, 3}, {1, 0, 2, 3});
+
+ luci::RemoveRedundantTransposePass pass;
+ while (pass.run(graph.get()))
+ ;
+ luci::CircleTranspose *transpose_node = nullptr;
+ for (auto node : loco::active_nodes(loco::output_nodes(graph.get())))
+ {
+ auto trans = dynamic_cast<luci::CircleTranspose *>(node);
+ if (not trans)
+ continue;
+ transpose_node = trans;
+ break;
+ }
+ // No transpose node is in graph.
+ ASSERT_EQ(nullptr, transpose_node);
+}
+
+TEST(RemoveRedundantTransposePass, remove_consecutive_transpose_function_with_branch_leave_one)
+{
+ auto graph = loco::make_graph();
+ create_redundunt_transpose_with_branch(graph.get(), {1, 0, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2});
+
+ luci::RemoveRedundantTransposePass pass;
+ while (pass.run(graph.get()))
+ ;
+ luci::CircleTranspose *transpose_node = nullptr;
+ for (auto node : loco::active_nodes(loco::output_nodes(graph.get())))
+ {
+ auto trans = dynamic_cast<luci::CircleTranspose *>(node);
+ if (not trans)
+ continue;
+ transpose_node = trans;
+ break;
+ }
+ ASSERT_NE(nullptr, transpose_node);
+ auto perm = loco::must_cast<luci::CircleConst *>(transpose_node->perm());
+ ASSERT_EQ(1, perm->at<loco::DataType::S32>(0));
+ ASSERT_EQ(0, perm->at<loco::DataType::S32>(1));
+ ASSERT_EQ(3, perm->at<loco::DataType::S32>(2));
+ ASSERT_EQ(2, perm->at<loco::DataType::S32>(3));
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveUnnecessaryReshapePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+bool remove_no_effect_reshape(luci::CircleNode *node)
+{
+ auto target_node = dynamic_cast<luci::CircleReshape *>(node);
+ if (target_node == nullptr)
+ return false;
+
+ auto new_shape = dynamic_cast<luci::CircleConst *>(target_node->shape());
+ if (new_shape == nullptr)
+ return false;
+
+ // Compare updated shape and input shape.
+ auto input_node = loco::must_cast<luci::CircleNode *>(target_node->tensor());
+ if (input_node->rank() != new_shape->dim(0).value())
+ return false;
+ for (uint32_t i = 0; i < input_node->rank(); i++)
+ {
+ // If update_shape is -1, don't care
+ // TODO check updated shape has value -1 at most one.
+ if (new_shape->at<loco::DataType::S32>(i) == -1)
+ continue;
+ // If input_shape dynamic, can't remove this.
+ if (!input_node->dim(i).known())
+ return false;
+ // If input_shape and updated shape differ, also can't remove.
+ if (input_node->dim(i).value() != static_cast<uint32_t>(new_shape->at<loco::DataType::S32>(i)))
+ return false;
+ }
+
+ replace(target_node).with(input_node);
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool RemoveUnnecessaryReshapePass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ if (remove_no_effect_reshape(circle_node))
+ {
+ changed = true;
+ }
+ }
+ return changed;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveUnnecessaryReshapePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+#include "test/TestFirstNode.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class ReshapeGraphlet
+{
+public:
+ ReshapeGraphlet() = default;
+
+public:
+ void init(loco::Graph *g, const ShapeU32 input_shape, bool remove)
+ {
+ std::vector<uint32_t> shape_vector{input_shape};
+
+ auto dim0_val = remove ? shape_vector.size() : 1;
+ _reshape_shape = g->nodes()->create<luci::CircleConst>();
+ _reshape_shape->rank(1);
+ _reshape_shape->dim(0).set(dim0_val);
+ _reshape_shape->shape_status(luci::ShapeStatus::VALID);
+ _reshape_shape->dtype(loco::DataType::S32);
+
+ _reshape_shape->size<loco::DataType::S32>(dim0_val);
+ for (uint32_t i = 0; i < dim0_val; i++)
+ {
+ if (remove)
+ _reshape_shape->at<loco::DataType::S32>(i) = static_cast<int32_t>(shape_vector.at(i));
+ else
+ _reshape_shape->at<loco::DataType::S32>(i) = -1;
+ }
+ _reshape_shape->name("reshape_shape");
+
+ // Reshape create
+ auto newshape_rank = remove ? shape_vector.size() : 1;
+ _reshape = g->nodes()->create<luci::CircleReshape>();
+ _reshape->newShape()->rank(newshape_rank);
+ for (uint32_t i = 0; i < newshape_rank; i++)
+ {
+ if (remove)
+ _reshape->newShape()->dim(i) = static_cast<int32_t>(shape_vector.at(i));
+ else
+ _reshape->newShape()->dim(i) = -1;
+ }
+ _reshape->name("reshape");
+ }
+
+protected:
+ luci::CircleReshape *_reshape = nullptr;
+ luci::CircleConst *_reshape_shape = nullptr;
+};
+
+class ReshapeGraph : public TestIOGraph, public ReshapeGraphlet
+{
+public:
+ ReshapeGraph() = default;
+
+public:
+ void init(const ShapeU32 shape, bool remove)
+ {
+ TestIOGraph::init(shape, shape);
+ ReshapeGraphlet::init(g(), shape, remove);
+
+ // connect graph
+ _reshape->tensor(input());
+ _reshape->shape(_reshape_shape);
+
+ output()->from(_reshape);
+ }
+};
+
+// TODO use ::testing::Test
+
+} // namespace
+
+TEST(RemoveUnnecessaryReshapePassTest, name)
+{
+ luci::RemoveUnnecessaryReshapePass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+TEST(RemoveUnnecessaryReshapePass, removed)
+{
+ ReshapeGraph g;
+
+ g.init({1, 2, 3, 4}, true);
+
+ // confirm graph has Reshape
+ auto reshape_node = luci::test::first_node<luci::CircleReshape>(g.g());
+ ASSERT_NE(nullptr, reshape_node);
+ luci::RemoveUnnecessaryReshapePass pass;
+ while (pass.run(g.g()))
+ ;
+
+ // check Reshape is removed
+ reshape_node = luci::test::first_node<luci::CircleReshape>(g.g());
+ ASSERT_EQ(nullptr, reshape_node);
+}
+
+TEST(RemoveUnnecessaryReshapePass, not_removed_NEG)
+{
+ ReshapeGraph g;
+
+ g.init({1, 2, 3, 4}, false);
+
+ // confirm graph has Reshape
+ auto reshape_node = luci::test::first_node<luci::CircleReshape>(g.g());
+ ASSERT_NE(nullptr, reshape_node);
+ luci::RemoveUnnecessaryReshapePass pass;
+ while (pass.run(g.g()))
+ ;
+
+ // check Reshape is NOT removed
+ reshape_node = luci::test::first_node<luci::CircleReshape>(g.g());
+ ASSERT_NE(nullptr, reshape_node);
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveUnnecessarySlicePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+/**
+ * @brief Return value in CircleConst.
+ * @details Return value in position on CircleConst with int64 format.
+ * Begin must be larger than or equal to 0. Size must be larger
+ * than or equal to -1.
+ */
+int64_t value_from_circle_const(const luci::CircleConst *node, uint32_t idx)
+{
+ assert(node->rank() == 1 && node->dim(0).value() > idx);
+ assert(node->dtype() == loco::DataType::S64 || node->dtype() == loco::DataType::S32);
+
+ if (node->dtype() == loco::DataType::S64)
+ return node->at<loco::DataType::S64>(idx);
+ return static_cast<int64_t>(node->at<loco::DataType::S32>(idx));
+}
+
+bool remove_no_effect_slice(luci::CircleNode *node)
+{
+ auto target_node = dynamic_cast<luci::CircleSlice *>(node);
+ if (target_node == nullptr)
+ return false;
+
+ auto begin_const = dynamic_cast<luci::CircleConst *>(target_node->begin());
+ if (begin_const == nullptr)
+ return false;
+
+ auto size_const = dynamic_cast<luci::CircleConst *>(target_node->size());
+ if (size_const == nullptr)
+ return false;
+
+ // Check input output shape.
+ auto input_node = loco::must_cast<luci::CircleNode *>(target_node->input());
+ for (uint32_t i = 0; i < input_node->rank(); i++)
+ {
+ if (value_from_circle_const(begin_const, i) != 0)
+ return false;
+
+ int64_t size_value = value_from_circle_const(size_const, i);
+ if (size_value == -1)
+ continue;
+ if (size_value != static_cast<int64_t>(input_node->dim(i).value()))
+ return false;
+
+ if (!input_node->dim(i).known())
+ return false;
+ }
+ replace(target_node).with(input_node);
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+/**
+ * BEFORE
+ *
+ * [CircleNode]
+ * |
+ * [CircleSlice]
+ * |
+ * [CircleNode]
+ *
+ * AFTER
+ *
+ * [CircleNode]
+ * |
+ * [CircleNode]
+ *
+ * Slice OP has no effect if,
+ * 1. Static Shape : begin_const[idx] is 0 AND size_const[idx] is (-1 OR input_dimension[idx])
+ * 2. Dynamic Shape : begin_const[idx] is 0 AND size_const[idx] is -1
+ */
+bool RemoveUnnecessarySlicePass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ if (remove_no_effect_slice(circle_node))
+ {
+ changed = true;
+ }
+ }
+ return changed;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci/Pass/RemoveUnnecessarySlicePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+#include "test/TestFirstNode.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class SliceGraphlet
+{
+public:
+ SliceGraphlet() = default;
+
+public:
+ void init(loco::Graph *g, const ShapeU32 input_shape, bool remove)
+ {
+ // Begin Create.
+ _begin = g->nodes()->create<luci::CircleConst>();
+ _begin->rank(1);
+ _begin->dim(0).set(input_shape.size());
+ _begin->shape_status(luci::ShapeStatus::VALID);
+ _begin->dtype(loco::DataType::S32);
+ _begin->size<loco::DataType::S32>(input_shape.size());
+ for (int i = 0; i < input_shape.size(); ++i)
+ _begin->at<loco::DataType::S32>(i) = remove ? 0 : 1;
+ _begin->name("begin");
+
+ // Size Create.
+ _size = g->nodes()->create<luci::CircleConst>();
+ _size->rank(1);
+ _size->dim(0).set(input_shape.size());
+ _size->shape_status(luci::ShapeStatus::VALID);
+ _size->dtype(loco::DataType::S32);
+ _size->size<loco::DataType::S32>(input_shape.size());
+ for (int i = 0; i < input_shape.size(); ++i)
+ _size->at<loco::DataType::S32>(i) = -1;
+ _size->name("size");
+
+ // Slice Node create.
+ _slice = g->nodes()->create<luci::CircleSlice>();
+ _slice->dtype(loco::DataType::S32);
+ _slice->name("slice");
+ }
+
+protected:
+ luci::CircleSlice *_slice = nullptr;
+ luci::CircleConst *_begin = nullptr;
+ luci::CircleConst *_size = nullptr;
+};
+
+class SliceGraph : public TestIOGraph, public SliceGraphlet
+{
+public:
+ SliceGraph() = default;
+
+public:
+ void init(const ShapeU32 shape, bool remove)
+ {
+ TestIOGraph::init(shape, shape);
+ SliceGraphlet::init(g(), shape, remove);
+
+ _slice->input(input());
+ _slice->begin(_begin);
+ _slice->size(_size);
+
+ output()->from(_slice);
+ }
+};
+
+} // namespace
+
+TEST(RemoveUnnecessarySlicePass, name)
+{
+ luci::RemoveUnnecessarySlicePass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+TEST(RemoveUnnecessarySlicePass, removed)
+{
+ SliceGraph g;
+
+ g.init({2, 4, 2, 3}, true);
+
+ // confirm graph has Slice
+ auto slice_node = luci::test::first_node<luci::CircleSlice>(g.g());
+ ASSERT_NE(nullptr, slice_node);
+ luci::RemoveUnnecessarySlicePass pass;
+ while (pass.run(g.g()))
+ ;
+
+ // check Slice is removed
+ slice_node = luci::test::first_node<luci::CircleSlice>(g.g());
+ ASSERT_EQ(nullptr, slice_node);
+}
+
+TEST(RemoveUnnecessarySlicePass, not_removed_NEG)
+{
+ SliceGraph g;
+
+ g.init({2, 4, 2, 3}, false);
+
+ // confirm graph has Slice
+ auto slice_node = luci::test::first_node<luci::CircleSlice>(g.g());
+ ASSERT_NE(nullptr, slice_node);
+ luci::RemoveUnnecessarySlicePass pass;
+ while (pass.run(g.g()))
+ ;
+
+ // check Slice is NOT removed
+ slice_node = luci::test::first_node<luci::CircleSlice>(g.g());
+ ASSERT_NE(nullptr, slice_node);
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveUnnecessarySplitPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+bool remove_unnecessary_split(luci::CircleNode *node)
+{
+ auto target_node = dynamic_cast<luci::CircleSplitOut *>(node);
+ if (target_node == nullptr)
+ return false;
+
+ auto split_node = dynamic_cast<luci::CircleSplit *>(target_node->input());
+ if (split_node == nullptr)
+ return false;
+
+ if (loco::succs(split_node).size() != 1)
+ return false;
+
+ if (split_node->num_split() == 1)
+ {
+ auto input_node = loco::must_cast<luci::CircleNode *>(split_node->input());
+ replace(target_node).with(input_node);
+ return true;
+ }
+ return false;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool RemoveUnnecessarySplitPass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ if (remove_unnecessary_split(circle_node))
+ {
+ changed = true;
+ }
+ }
+ return changed;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveUnnecessarySplitPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+#include "test/TestFirstNode.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class SplitGraphlet
+{
+public:
+ SplitGraphlet() = default;
+
+public:
+ void init(loco::Graph *g, uint32_t nout)
+ {
+ assert(nout == 1 || nout == 2);
+
+ _dim = g->nodes()->create<luci::CircleConst>();
+ set_shape_vector(_dim, {0});
+ _dim->name("dim");
+
+ _split = g->nodes()->create<luci::CircleSplit>();
+ _split->num_split(nout);
+ _split->name("split");
+
+ _split_out_0 = g->nodes()->create<luci::CircleSplitOut>();
+ _split_out_0->index(0);
+ _split_out_0->name("split_out_0");
+
+ if (nout == 2)
+ {
+ _split_out_1 = g->nodes()->create<luci::CircleSplitOut>();
+ _split_out_1->index(1);
+ _split_out_1->name("split_out_1");
+ }
+ }
+
+protected:
+ luci::CircleSplit *_split = nullptr;
+ luci::CircleConst *_dim = nullptr;
+ luci::CircleSplitOut *_split_out_0 = nullptr;
+ luci::CircleSplitOut *_split_out_1 = nullptr;
+};
+
+class SplitOneGraph : public TestIGraphlet, public TestOGraphlet, public SplitGraphlet
+{
+public:
+ SplitOneGraph() = default;
+
+public:
+ void init()
+ {
+ TestIGraphlet::init(g(), {1});
+ TestOGraphlet::init(g(), {1});
+ SplitGraphlet::init(g(), 1);
+
+ _split->input(input());
+ _split->split_dim(_dim);
+ _split_out_0->input(_split);
+
+ output()->from(_split_out_0);
+ }
+};
+
+class SplitTwoGraph : public TestIGraphlet, public TestOsGraphlet<2>, public SplitGraphlet
+{
+public:
+ SplitTwoGraph() = default;
+
+public:
+ void init()
+ {
+ TestIGraphlet::init(g(), {1});
+ TestOsGraphlet<2>::init(g(), {{1}, {1}});
+ SplitGraphlet::init(g(), 2);
+
+ _split->input(input());
+ _split->split_dim(_dim);
+ _split_out_0->input(_split);
+ _split_out_1->input(_split);
+
+ output(0)->from(_split_out_0);
+ output(1)->from(_split_out_1);
+ }
+};
+
+// TODO use ::testing::Test
+
+} // namespace
+
+TEST(RemoveUnnecessarySplitPass, name)
+{
+ luci::RemoveUnnecessarySplitPass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+TEST(RemoveUnnecessarySplitPass, create_unnecessary_split)
+{
+ SplitOneGraph g;
+
+ g.init();
+
+ luci::RemoveUnnecessarySplitPass pass;
+ while (pass.run(g.g()))
+ ;
+
+ auto split_node = luci::test::first_node<luci::CircleSplit>(g.g());
+ // No Split node is in graph.
+ ASSERT_EQ(nullptr, split_node);
+}
+
+TEST(RemoveUnnecessarySplitPass, create_unnecessary_split_NEG)
+{
+ SplitTwoGraph g;
+
+ g.init();
+
+ luci::RemoveUnnecessarySplitPass pass;
+ while (pass.run(g.g()))
+ ;
+
+ auto split_node = luci::test::first_node<luci::CircleSplit>(g.g());
+ // Split node is in graph.
+ ASSERT_NE(nullptr, split_node);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveUnnecessaryStridedSlicePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+/**
+ * @brief Return value in CircleConst.
+ * @details Return value in position on CircleConst with int64 format.
+ */
+int64_t value_from_circle_const(const luci::CircleConst *node, uint32_t idx)
+{
+ assert(node->rank() == 1 && node->dim(0).value() > idx);
+ assert(node->dtype() == loco::DataType::S64 || node->dtype() == loco::DataType::S32);
+
+ if (node->dtype() == loco::DataType::S64)
+ return node->at<loco::DataType::S64>(idx);
+ return static_cast<int64_t>(node->at<loco::DataType::S32>(idx));
+}
+
+bool remove_no_effect_strided_slice(luci::CircleStridedSlice *target_node)
+{
+ auto begin_const = dynamic_cast<luci::CircleConst *>(target_node->begin());
+ if (begin_const == nullptr)
+ return false;
+
+ auto strides_const = dynamic_cast<luci::CircleConst *>(target_node->strides());
+ if (strides_const == nullptr)
+ return false;
+
+ auto end_const = dynamic_cast<luci::CircleConst *>(target_node->end());
+ if (end_const == nullptr)
+ return false;
+
+ auto input_node = loco::must_cast<luci::CircleNode *>(target_node->input());
+ for (uint32_t i = 0; i < input_node->rank(); i++)
+ {
+ if (value_from_circle_const(begin_const, i) != 0)
+ return false;
+
+ int64_t strides_value = value_from_circle_const(strides_const, i);
+ if (strides_value != 1)
+ return false;
+
+ int64_t end_value = value_from_circle_const(end_const, i);
+ if (end_value == -1)
+ continue;
+
+ if (end_value != input_node->dim(i).value())
+ return false;
+
+ if (!input_node->dim(i).known())
+ return false;
+ }
+
+ /**
+ * We check additional attributes on zero after shapes
+ * for skipping wrong StridedSlice operator.
+ */
+ if (target_node->new_axis_mask() != 0 || target_node->shrink_axis_mask() != 0)
+ return false;
+
+ replace(target_node).with(input_node);
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+/**
+ * BEFORE
+ *
+ * [CircleNode]
+ * |
+ * [CircleStridedSlice]
+ * |
+ * [CircleNode]
+ *
+ * AFTER
+ *
+ * [CircleNode]
+ * |
+ * [CircleNode] [CircleStridedSlice]
+ *
+ * StridedSlice OP has no effect if,
+ * 1. Static Shape : begin_const[idx] is 0 AND strides_const[idx] is (not 1 OR
+ * input_dimension[idx])
+ * 2. Dynamic Shape : begin_const[idx] is 0 AND strides_const[idx] is not 1
+ *
+ * StridedSlice OP has effect if,
+ * 1. begin_const[idx] is 0 AND input_shape[idx] are equal to end_shape[idx]
+ */
+bool RemoveUnnecessaryStridedSlicePass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto target_node = dynamic_cast<luci::CircleStridedSlice *>(node);
+ if (target_node != nullptr)
+ if (remove_no_effect_strided_slice(target_node))
+ changed = true;
+ }
+ return changed;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci/Pass/RemoveUnnecessaryStridedSlicePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+#include "test/TestFirstNode.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class StridedSliceGraphlet
+{
+public:
+ StridedSliceGraphlet() = default;
+
+public:
+ void init(loco::Graph *g, const ShapeU32 input_shape, bool remove)
+ {
+ // Begin create
+ _begin = g->nodes()->create<luci::CircleConst>();
+ _begin->rank(1);
+ _begin->dim(0).set(input_shape.size());
+ _begin->shape_status(luci::ShapeStatus::VALID);
+ _begin->dtype(loco::DataType::S32);
+ _begin->size<loco::DataType::S32>(input_shape.size());
+ for (int i = 0; i < input_shape.size(); ++i)
+ {
+ _begin->at<loco::DataType::S32>(i) = remove ? 0 : 1;
+ }
+
+ // Strides create
+ _strides = g->nodes()->create<luci::CircleConst>();
+ _strides->rank(1);
+ _strides->dim(0).set(input_shape.size());
+ _strides->shape_status(luci::ShapeStatus::VALID);
+ _strides->dtype(loco::DataType::S32);
+ _strides->size<loco::DataType::S32>(input_shape.size());
+ for (int i = 0; i < input_shape.size(); ++i)
+ {
+ _strides->at<loco::DataType::S32>(i) = remove ? 1 : -1;
+ }
+
+ std::vector<uint32_t> shape_vector{input_shape};
+
+ _end = g->nodes()->create<luci::CircleConst>();
+ _end->rank(1);
+ _end->dim(0).set(input_shape.size());
+ _end->shape_status(luci::ShapeStatus::VALID);
+ _end->dtype(loco::DataType::S32);
+ _end->size<loco::DataType::S32>(input_shape.size());
+ for (int i = 0; i < input_shape.size(); ++i)
+ {
+ if (remove)
+ _end->at<loco::DataType::S32>(i) = static_cast<int32_t>(shape_vector.at(i));
+ else
+ _end->at<loco::DataType::S32>(i) = -1;
+ }
+
+ // StridedSlice Node create
+ _strided_slice = g->nodes()->create<luci::CircleStridedSlice>();
+ _strided_slice->dtype(loco::DataType::S32);
+ }
+
+protected:
+ luci::CircleStridedSlice *_strided_slice = nullptr;
+ luci::CircleConst *_begin = nullptr;
+ luci::CircleConst *_strides = nullptr;
+ luci::CircleConst *_end = nullptr;
+};
+
+class StridedSliceGraph : public TestIOGraph, public StridedSliceGraphlet
+{
+public:
+ StridedSliceGraph() = default;
+
+public:
+ void init(const ShapeU32 shape, bool remove)
+ {
+ TestIOGraph::init(shape, shape);
+ StridedSliceGraphlet::init(g(), shape, remove);
+
+ _strided_slice->input(input());
+ _strided_slice->begin(_begin);
+ _strided_slice->strides(_strides);
+ _strided_slice->end(_end);
+
+ output()->from(_strided_slice);
+ }
+};
+
+} // namespace
+
+TEST(RemoveUnnecessaryStridedSlicePass, basic_case)
+{
+ StridedSliceGraph g;
+
+ g.init({2, 4, 2, 3}, true);
+
+ auto strided_slice_node = luci::test::first_node<luci::CircleStridedSlice>(g.g());
+ ASSERT_NE(nullptr, strided_slice_node);
+ luci::RemoveUnnecessaryStridedSlicePass pass;
+ while (pass.run(g.g()))
+ ;
+
+ strided_slice_node = luci::test::first_node<luci::CircleStridedSlice>(g.g());
+ ASSERT_EQ(nullptr, strided_slice_node);
+}
+
+TEST(RemoveUnnecessaryStridedSlicePass, basic_fail_case_NEG)
+{
+ StridedSliceGraph g;
+
+ g.init({2, 4, 2, 3}, false);
+
+ auto strided_slice_node = luci::test::first_node<luci::CircleStridedSlice>(g.g());
+ ASSERT_NE(nullptr, strided_slice_node);
+ luci::RemoveUnnecessaryStridedSlicePass pass;
+ while (pass.run(g.g()))
+ ;
+
+ strided_slice_node = luci::test::first_node<luci::CircleStridedSlice>(g.g());
+ ASSERT_NE(nullptr, strided_slice_node);
+}
#include "luci/Pass/ReplaceMulAddWithDepthwiseConvPass.h"
+#include "BatchNormPatternFinder.h"
+
#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
namespace
{
assert(gamma->rank() == 1);
auto channel_size = gamma->dim(0).value();
+ auto name = gamma->name();
+ assert(name.length() > 0);
+
// Channel-wise MUL is the same as DEPTHWISE_CONV2D with filter shape (1,1,1,channel_size)
auto weights = gamma->graph()->nodes()->create<luci::CircleConst>();
weights->dtype(loco::DataType::FLOAT32);
{
weights->at<loco::DataType::FLOAT32>(i) = gamma->at<loco::DataType::FLOAT32>(i);
}
+ weights->name(name + "_weights");
return weights;
}
assert(beta->rank() == 1);
auto channel_size = beta->dim(0).value();
+ auto name = beta->name();
+ assert(name.length() > 0);
+
// Channel-wise ADD is the same as bias (shape = (channel_size)) of DEPTHWISE_CONV2D
auto bias = beta->graph()->nodes()->create<luci::CircleConst>();
bias->dtype(loco::DataType::FLOAT32);
{
bias->at<loco::DataType::FLOAT32>(i) = beta->at<loco::DataType::FLOAT32>(i);
}
+ bias->name(name + "_bias");
return bias;
}
-bool is_batchnorm_add(const luci::CircleAdd *add, luci::CircleMul *&mul, luci::CircleConst *&beta)
-{
- auto x = loco::must_cast<luci::CircleNode *>(add->x());
- auto y = loco::must_cast<luci::CircleNode *>(add->y());
-
- luci::CircleMul *pred = nullptr;
- luci::CircleConst *constant = nullptr;
-
- if (x->opcode() == luci::CircleOpcode::CIRCLECONST && y->opcode() == luci::CircleOpcode::MUL)
- {
- pred = loco::must_cast<luci::CircleMul *>(y);
- constant = loco::must_cast<luci::CircleConst *>(x);
- }
- else if (x->opcode() == luci::CircleOpcode::MUL && y->opcode() == luci::CircleOpcode::CIRCLECONST)
- {
- pred = loco::must_cast<luci::CircleMul *>(x);
- constant = loco::must_cast<luci::CircleConst *>(y);
- }
- else
- {
- return false;
- }
-
- if (constant->rank() != 1)
- return false;
-
- auto channel_dim = constant->dim(0);
- // Assumption: Layout is channel-last
- if (!(channel_dim == add->dim(add->rank() - 1)))
- return false;
-
- mul = pred;
- beta = constant;
- return true;
-}
-
-// Check if mul is batchnorm mul
-bool is_batchnorm_mul(const luci::CircleMul *mul, luci::CircleNode *&pred_node,
- luci::CircleConst *&gamma)
-{
- auto x = dynamic_cast<luci::CircleConst *>(mul->x());
- auto y = dynamic_cast<luci::CircleConst *>(mul->y());
-
- luci::CircleNode *pred = nullptr;
- luci::CircleConst *constant = nullptr;
-
- if (x != nullptr && y == nullptr)
- {
- pred = loco::must_cast<luci::CircleNode *>(mul->y());
- constant = x;
- }
- else if (x == nullptr && y != nullptr)
- {
- pred = loco::must_cast<luci::CircleNode *>(mul->x());
- constant = y;
- }
- else
- {
- return false;
- }
-
- if (constant->rank() != 1)
- return false;
-
- auto channel_dim = constant->dim(0);
- if (!(channel_dim == mul->dim(mul->rank() - 1)))
- return false;
-
- pred_node = pred;
- gamma = constant;
- return true;
-}
-
/**
* Replace channel-wise Mul/Add with DepthwiseConv2D
*
auto weights = create_weights_from_gamma(gamma);
auto bias = create_bias_from_beta(beta);
+ auto name = add->name();
+ assert(name.length() > 0);
+
auto dwconv = add->graph()->nodes()->create<luci::CircleDepthwiseConv2D>();
dwconv->input(pred_node);
dwconv->filter(weights);
dwconv->dilation()->w(1);
dwconv->dilation()->h(1);
dwconv->fusedActivationFunction(add->fusedActivationFunction());
+ dwconv->name(name + "/DepthwiseConv2D");
+ luci::add_origin(dwconv, luci::composite_origin({luci::get_origin(mul), luci::get_origin(add)}));
loco::replace(add).with(dwconv);
return true;
bool changed = false;
for (auto node : loco::active_nodes(loco::output_nodes(g)))
{
- auto add = dynamic_cast<luci::CircleAdd *>(node);
- if (not add)
- continue;
-
- if (replace_mul_add_with_dwconv(add))
+ if (auto add = dynamic_cast<luci::CircleAdd *>(node))
{
- changed = true;
- break;
+ if (replace_mul_add_with_dwconv(add))
+ changed = true;
}
}
add->x(mul);
add->y(beta);
output->from(add);
+
+ input->name("input");
+ mul->name("mul");
+ gamma->name("gamma");
+ add->name("add");
+ beta->name("beta");
+ output->name("output");
}
public:
} // namespace
+TEST(ReplaceMulAddWithDepthwiseConv, name)
+{
+ luci::ReplaceMulAddWithDepthwiseConvPass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
TEST(ReplaceMulAddWithDepthwiseConv, simple)
{
SimpleGraph g;
struct RequantizeNonConst final : public luci::CircleNodeMutableVisitor<bool>
{
RequantizeNonConst(loco::DataType input, loco::DataType output)
- : _input_type(input), _output_type(output)
+ : _input_type(input), _output_type(output)
{
}
struct RequantizeConst final : public luci::CircleNodeMutableVisitor<bool>
{
RequantizeConst(loco::DataType input, loco::DataType output)
- : _input_type(input), _output_type(output)
+ : _input_type(input), _output_type(output)
{
}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RequantizePass.h"
+
+#include <gtest/gtest.h>
+
+TEST(RequantizePassTest, name)
+{
+ luci::RequantizePass pass(loco::DataType::FLOAT32, loco::DataType::U8);
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
#include <luci/IR/CircleNodes.h>
#include <luci/IR/AttrFusedActFunc.h>
+#include <luci/Profile/CircleNodeOrigin.h>
namespace
{
auto input = loco::must_cast<const luci::CircleCustomOut *>(addv2->inputs(broadcastTo_idx));
auto broadcastTo = loco::must_cast<luci::CircleCustom *>(input->input());
+ auto name = addv2->name();
+ assert(name.length() > 0);
+
auto add = addv2->graph()->nodes()->create<luci::CircleAdd>();
add->fusedActivationFunction(luci::FusedActFunc::NONE);
add->x(addv2->inputs(1 - broadcastTo_idx));
add->y(broadcastTo->inputs(0));
+ add->name(name + "/Add");
+ luci::add_origin(
+ add, luci::composite_origin({luci::get_origin(broadcastTo), luci::get_origin(addv2)}));
+
auto customOut = loco::succs(addv2);
assert(customOut.size() == 1);
replace(*customOut.begin()).with(add);
if (custom_code != "AddV2")
return false;
+ if (addv2->numInputs() != 2)
+ return false;
+
+ // check if inputs are suppport data types
+ for (uint32_t i = 0; i < addv2->numInputs(); i++)
+ {
+ auto input = loco::must_cast<luci::CircleNode *>(addv2->inputs(i));
+ switch (input->dtype())
+ {
+ case loco::DataType::U8:
+ case loco::DataType::S8:
+ case loco::DataType::S16:
+ case loco::DataType::S32:
+ case loco::DataType::FLOAT32:
+ break;
+ default:
+ return false;
+ }
+ }
+
if (resolve_with_BroadcastTo(addv2))
return true;
+ auto name = addv2->name();
+ assert(name.length() > 0);
+
auto add = addv2->graph()->nodes()->create<luci::CircleAdd>();
add->fusedActivationFunction(luci::FusedActFunc::NONE);
add->x(addv2->inputs(0));
add->y(addv2->inputs(1));
+ add->name(name + "/Add");
+ luci::add_origin(add, luci::get_origin(addv2));
+
auto customOut = loco::succs(addv2);
assert(customOut.size() == 1);
replace(*customOut.begin()).with(add);
if (not cop)
continue;
- changed |= resolve_custom_op(cop);
+ if (resolve_custom_op(cop))
+ changed = true;
}
return changed;
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ResolveCustomOpAddPass.h"
+
+#include <gtest/gtest.h>
+
+TEST(ResolveCustomOpAddPassTest, name)
+{
+ luci::ResolveCustomOpAddPass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
#include "flatbuffers/flexbuffers.h"
#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
namespace
{
if (custom_code == "BatchMatMulV2")
{
+ auto name = cop->name();
+ assert(name.length() > 0);
+
auto batch_matmul = cop->graph()->nodes()->create<luci::CircleBatchMatMul>();
// input
batch_matmul->x(cop->inputs(0));
auto map = flexbuffers::GetRoot(custom_options).AsMap();
batch_matmul->adj_x(map["adj_x"].AsBool());
batch_matmul->adj_y(map["adj_y"].AsBool());
+ batch_matmul->name(name + "/BatchMatMul");
+ luci::add_origin(batch_matmul, luci::get_origin(cop));
+
+ auto customOut = loco::succs(cop);
+ assert(customOut.size() == 1);
+ replace(*customOut.begin()).with(batch_matmul);
- replace(cop).with(batch_matmul);
return true;
}
+
return false;
}
namespace luci
{
+/**
+ * BEFORE
+ * | |
+ * [CircleNode] [CircleNode]
+ * \ /
+ * [CircleCustom]("BatchMatMulV2")
+ * |
+ * [CircleCustomOut]
+ * |
+ * [CircleNode]
+ * |
+ *
+ * AFTER
+ * | |
+ * [CircleNode] [CircleNode]
+ * \ /
+ * [CircleBatchMatMul]
+ * |
+ * [CircleNode]
+ * |
+ */
bool ResolveCustomOpBatchMatMulPass::run(loco::Graph *g)
{
bool changed = false;
if (not cop)
continue;
- changed |= resolve_custom_op(cop);
+ if (resolve_custom_op(cop))
+ changed = true;
}
return changed;
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ResolveCustomOpBatchMatMulPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include "flatbuffers/flatbuffers.h"
+#include "flatbuffers/flexbuffers.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+const int N = 1;
+const int C = 2;
+const int H_X = 1;
+const int W_X = 4;
+const int H_Y = 4;
+const int W_Y = 4;
+
+/**
+ * graph having Custom operator BatchMatMulV2
+ *
+ * [CircleInput] [CircleInput]
+ * \ /
+ * [CircleCustom]
+ * |
+ * [CircleCustomOut]
+ * |
+ * [CircleOutput]
+ */
+class BatchMatmulV2Graphlet
+{
+public:
+ BatchMatmulV2Graphlet() = default;
+
+public:
+ void init(loco::Graph *g)
+ {
+ // custom option
+ auto flatbuffer_builder =
+ std::unique_ptr<flatbuffers::FlatBufferBuilder>(new flatbuffers::FlatBufferBuilder(1024));
+ auto flex_buffers = std::make_unique<flexbuffers::Builder>();
+ size_t map_start = flex_buffers->StartMap();
+ flex_buffers->Bool("adj_x", false);
+ flex_buffers->Bool("adj_y", false);
+ flex_buffers->Int("T", 0 /* circle::TensorType_FLOAT32 */);
+ flex_buffers->EndMap(map_start);
+ flex_buffers->Finish();
+
+ // CircleCustom(BatchMatMulV2, adj_x=False, adj_y=False)
+ _batchmatmulv2 = g->nodes()->create<luci::CircleCustom>(2, 1);
+ _batchmatmulv2->custom_code("BatchMatMulV2");
+ _batchmatmulv2->custom_options(flex_buffers->GetBuffer());
+ _batchmatmulv2->shape({N, C, H_X, W_Y});
+ _batchmatmulv2->dtype(loco::DataType::FLOAT32);
+ _batchmatmulv2->name("batchmatmulv2");
+
+ // CircleCustomOut
+ _batchmatmulv2_out = g->nodes()->create<luci::CircleCustomOut>();
+ _batchmatmulv2_out->shape({N, C, H_X, W_Y});
+ _batchmatmulv2_out->dtype(loco::DataType::FLOAT32);
+ _batchmatmulv2_out->index(0);
+ }
+
+public:
+ luci::CircleCustom *batchmatmulv2() { return _batchmatmulv2; }
+
+protected:
+ luci::CircleCustom *_batchmatmulv2 = nullptr;
+ luci::CircleCustomOut *_batchmatmulv2_out = nullptr;
+};
+
+class BatchMatmulV2Graph : public TestIsGraphlet<2>,
+ public TestOGraphlet,
+ public BatchMatmulV2Graphlet
+{
+public:
+ BatchMatmulV2Graph() = default;
+
+ void init(void)
+ {
+ TestIsGraphlet<2>::init(g(), {{N, C, H_X, W_X}, {N, C, H_X, W_X}});
+ TestOGraphlet::init(g(), {N, C, H_X, W_Y});
+ BatchMatmulV2Graphlet::init(g());
+
+ // TODO how set multiple of shape vector for TestIsGraphlet?
+ // update shape for second input
+ input(1)->shape({N, C, H_Y, W_Y});
+
+ // connect graph
+ _batchmatmulv2->inputs(0, input(0));
+ _batchmatmulv2->inputs(1, input(1));
+ _batchmatmulv2_out->input(_batchmatmulv2);
+
+ output()->from(_batchmatmulv2_out);
+ }
+};
+
+class BatchMatmulV2GraphTest : public ::testing::Test
+{
+public:
+ BatchMatmulV2Graph g;
+ luci::ResolveCustomOpBatchMatMulPass pass;
+};
+
+} // namespace
+
+TEST(ResolveCustomOpBatchMatMulPassTest, name)
+{
+ luci::ResolveCustomOpBatchMatMulPass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+/**
+ * Optimized graph looks like below.
+ *
+ * [CircleInput]
+ * |
+ * [CircleBatchMatMul]
+ * |
+ * [CircleOutput]
+ */
+TEST_F(BatchMatmulV2GraphTest, simple_test)
+{
+ g.init();
+
+ auto ret = pass.run(g.g());
+ EXPECT_EQ(true, ret);
+
+ auto batchmatmul = dynamic_cast<luci::CircleBatchMatMul *>(g.output()->from());
+ EXPECT_NE(nullptr, batchmatmul);
+
+ auto input_0 = dynamic_cast<luci::CircleInput *>(batchmatmul->x());
+ auto input_1 = dynamic_cast<luci::CircleInput *>(batchmatmul->y());
+ EXPECT_NE(nullptr, input_0);
+ EXPECT_NE(nullptr, input_1);
+}
+
+TEST_F(BatchMatmulV2GraphTest, wrong_condition_NEG)
+{
+ g.init();
+
+ // wrong custom code
+ g.batchmatmulv2()->custom_code("BatchMatMulv2"); // v is lower case
+ auto ret = pass.run(g.g());
+
+ EXPECT_EQ(false, ret);
+}
#include <loco/IR/DataTypeTraits.h>
#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
#include <loco.h>
#include <oops/InternalExn.h>
-#include <loco/Service/ShapeInference.h>
-#include <loco/Service/TypeInference.h>
namespace
{
node->dim(i) = shape.at(i);
size *= shape.at(i);
}
+ node->shape_status(luci::ShapeStatus::VALID);
#define INIT_VALUES(DT) \
{ \
const auto S32 = loco::DataType::S32;
const auto FLOAT32 = loco::DataType::FLOAT32;
+ auto name = cop->name();
+ assert(name.length() > 0);
+
bool transpose_a = map["transpose_a"].AsBool();
bool transpose_b = map["transpose_b"].AsBool();
loco::Node *rhs = cop->inputs(1);
// Check that the type of the first input is known
- CHECK_OR_FALSE(loco::dtype_known(lhs));
- auto lhs_dtype = loco::dtype_get(cop->inputs(0));
+ auto lhs_dtype = loco::must_cast<luci::CircleNode *>(cop->inputs(0))->dtype();
+ CHECK_OR_FALSE(lhs_dtype != loco::DataType::Unknown);
// If transpose of first input is requested, its shape must be known
- CHECK_OR_FALSE(!transpose_a || loco::shape_known(lhs));
+ auto circle_lhs = loco::must_cast<luci::CircleNode *>(lhs);
+ CHECK_OR_FALSE(!transpose_a || circle_lhs->shape_status() == luci::ShapeStatus::VALID);
// and its rank should be at least 2
- CHECK_OR_FALSE(!transpose_a || loco::shape_get(lhs).as<loco::TensorShape>().rank() >= 2);
+ CHECK_OR_FALSE(!transpose_a || circle_lhs->rank() >= 2);
// Check that the shape of the 2nd input is known
- CHECK_OR_FALSE(loco::shape_known(rhs));
+ auto circle_rhs = loco::must_cast<luci::CircleNode *>(rhs);
+ CHECK_OR_FALSE(circle_rhs->shape_status() == luci::ShapeStatus::VALID);
// TODO as of 06/23/20 TFLite only supports rank 2 for 2nd input. Fix this once that changes!
- CHECK_OR_FALSE(loco::shape_get(rhs).as<loco::TensorShape>().rank() == 2);
+ CHECK_OR_FALSE(circle_rhs->rank() == 2);
// Check that input data type is supported
CHECK_OR_THROW(lhs_dtype == U8 || lhs_dtype == S16 || lhs_dtype == FLOAT32,
"Only UInt8, Int16 and Float32 data types are supported by MatMul");
if (transpose_a)
{
- auto a_shape = loco::shape_get(lhs).as<loco::TensorShape>();
// Create a permutation constant node
std::vector<uint32_t> perm;
- for (uint32_t i = 0; i < a_shape.rank(); ++i)
+ for (uint32_t i = 0; i < circle_lhs->rank(); ++i)
perm.push_back(i);
- std::swap(perm[a_shape.rank() - 1], perm[a_shape.rank() - 2]);
- auto perm_node = create_const_node(graph, S32, {a_shape.rank()}, perm);
+ std::swap(perm[circle_lhs->rank() - 1], perm[circle_lhs->rank() - 2]);
+ auto perm_node = create_const_node(graph, S32, {circle_lhs->rank()}, perm);
+ perm_node->name(name + "/lhs/Transpose/perm");
// Now make a transpose node
auto transpose_node = graph->nodes()->create<luci::CircleTranspose>();
transpose_node->a(lhs);
transpose_node->perm(perm_node);
+ transpose_node->name(name + "/lhs/Transpose");
+ luci::add_origin(transpose_node, luci::get_origin(cop));
lhs = transpose_node;
}
{
const std::vector<uint32_t> perm{1, 0};
auto perm_node = create_const_node(graph, S32, {2}, perm);
+ perm_node->name(name + "/rhs/Transpose/perm");
auto transpose_node = graph->nodes()->create<luci::CircleTranspose>();
transpose_node->a(rhs);
transpose_node->perm(perm_node);
+ transpose_node->name(name + "/rhs/Transpose");
+ luci::add_origin(transpose_node, luci::get_origin(cop));
rhs = transpose_node;
}
- // Make a constant zero-filled bias node
- auto b_shape = loco::shape_get(cop->inputs(1)).as<loco::TensorShape>();
- uint32_t bias_size = b_shape.dim(transpose_b ? 1 : 0).value();
- const std::vector<float> val(bias_size, .0f);
- auto bias_node = create_const_node(graph, lhs_dtype, {bias_size}, val);
+ auto empty_bias = graph->nodes()->create<luci::CircleOutputExclude>();
+ empty_bias->dtype(loco::DataType::FLOAT32); // Needed for type inference
+
auto fc_node = graph->nodes()->create<luci::CircleFullyConnected>();
fc_node->input(lhs);
fc_node->weights(rhs);
- fc_node->bias(bias_node);
+ fc_node->bias(empty_bias);
fc_node->fusedActivationFunction(luci::FusedActFunc::NONE);
+ fc_node->name(name + "/FullyConnected");
+ luci::add_origin(fc_node, luci::get_origin(cop));
- replace(cop).with(fc_node);
+ auto customOut = loco::succs(cop);
+ assert(customOut.size() == 1);
+ replace(*customOut.begin()).with(fc_node);
return true;
}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ResolveCustomOpMatMulPass.h"
+
+#include <gtest/gtest.h>
+
+TEST(ResolveCustomOpMatMulPassTest, name)
+{
+ luci::ResolveCustomOpMatMulPass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci/Pass/ShapeInferencePass.h"
-
-#include <luci/IR/CircleDialect.h>
-#include <luci/Service/CircleShapeInferenceRule.h>
-
-#include <loco.h>
-#include <loco/IR/CanonicalDialect.h>
-#include <loco/Service/CanonicalShapeInferenceRule.h>
-#include <loco/Service/ShapeInference.h>
-#include <loco/Service/MultiDialectShapeInferenceRule.h>
-
-namespace luci
-{
-
-bool ShapeInferencePass::run(luci::Module *m)
-{
- bool changed = false;
-
- for (size_t g = 0; g < m->size(); ++g)
- {
- if (run(m->graph(g)))
- changed = true;
- }
-
- return changed;
-}
-
-bool ShapeInferencePass::run(loco::Graph *g)
-{
- loco::CanonicalShapeInferenceRule canonical_rule;
- luci::CircleShapeInferenceRule circle_rule;
-
- loco::MultiDialectShapeInferenceRule rules;
-
- rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
- .bind(luci::CircleDialect::get(), &circle_rule);
-
- return loco::apply(&rules).to(g);
-}
-
-} // namespace luci
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci/Pass/ShapeSignatureInferencePass.h"
-
-#include <luci/IR/CircleShapeSignature.h>
-#include <luci/Service/CircleShapeSignatureInference.h>
-
-#include <loco.h>
-
-namespace luci
-{
-
-bool ShapeSignatureInferencePass::run(luci::Module *m)
-{
- bool changed = false;
-
- for (size_t g = 0; g < m->size(); ++g)
- {
- if (run(m->graph(g)))
- changed = true;
- }
-
- return changed;
-}
-
-bool ShapeSignatureInferencePass::run(loco::Graph *g)
-{
- luci::ssinf::Rule signature_inference_rule;
- bool changed = false;
-
- for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
- {
- luci::ShapeSignature shape_signature;
-
- auto circle_node = loco::must_cast<luci::CircleNode *>(node);
- if (signature_inference_rule.infer(circle_node, shape_signature))
- {
- if (!(circle_node->shape_signature() == shape_signature))
- {
- circle_node->shape_signature(shape_signature);
- changed = true;
- }
- }
- }
-
- return changed;
-}
-
-} // namespace luci
{
auto the_weights = loco::must_cast<luci::CircleConst *>(fc->weights());
+ auto name = fc->name();
+ assert(name.length() > 0);
+
// create CircleConst where shuffled data will be stored
luci::CircleConst *new_weights = fc->graph()->nodes()->create<luci::CircleConst>();
new_weights->dtype(loco::DataType::FLOAT32);
{
new_weights->dim(r).set(the_weights->dim(r).value());
}
+ new_weights->name(name + "/shuffle_weight");
// suffle weight
const uint32_t MULTIPLE = 16;
for (uint32_t i = 0; i < MULTIPLE; i++)
{
new_weights->at<loco::DataType::FLOAT32>(index++) =
- the_weights->at<loco::DataType::FLOAT32>((r * MULTIPLE + i) * cols + c);
+ the_weights->at<loco::DataType::FLOAT32>((r * MULTIPLE + i) * cols + c);
}
}
}
fc->weights(new_weights);
fc->weights_format(luci::CircleFullyConnected::WeightsFormat::SHUFFLED16x1FLOAT32);
}
+
+ changed = true;
}
return changed;
#include <luci/IR/CircleNodes.h>
+#include <luci/test/TestIOGraph.h>
+#include "test/TestFirstNode.h"
+
#include <gtest/gtest.h>
-void create_fc_net(loco::Graph *g)
+namespace
{
- assert(g);
-
- const uint32_t ROW = 16;
- const uint32_t COL = 2;
- const uint32_t elements_num = ROW * COL;
-
- // input
- auto input = g->nodes()->create<luci::CircleInput>();
- auto graph_input = g->inputs()->create();
- input->index(graph_input->index());
-
- // fc weights
- auto weights = g->nodes()->create<luci::CircleConst>();
- weights->dtype(loco::DataType::FLOAT32);
- weights->size<loco::DataType::FLOAT32>(elements_num);
- weights->rank(2);
- weights->dim(0).set(ROW);
- weights->dim(1).set(COL);
- for (uint32_t idx = 0; idx < elements_num; idx++)
+
+using namespace luci::test;
+
+class FCGraphlet
+{
+public:
+ FCGraphlet() = default;
+
+public:
+ void init(loco::Graph *g, const ShapeU32 wshape)
{
- weights->at<loco::DataType::FLOAT32>(idx) = idx;
+ const uint32_t elements_num = num_elements(wshape);
+
+ // fc weights
+ _weights = g->nodes()->create<luci::CircleConst>();
+ _weights->dtype(loco::DataType::FLOAT32);
+ _weights->shape(wshape);
+ _weights->size<loco::DataType::FLOAT32>(elements_num);
+ for (uint32_t idx = 0; idx < elements_num; idx++)
+ {
+ _weights->at<loco::DataType::FLOAT32>(idx) = idx;
+ }
+ _weights->name("weights");
+
+ // fc
+ _fc = g->nodes()->create<luci::CircleFullyConnected>();
+ _fc->dtype(loco::DataType::FLOAT32);
+ _fc->name("fc");
}
- // fc
- auto fc = g->nodes()->create<luci::CircleFullyConnected>();
- fc->dtype(loco::DataType::FLOAT32);
- fc->input(input);
- fc->weights(weights);
-
- // output
- auto output = g->nodes()->create<luci::CircleOutput>();
- output->from(fc);
- auto graph_output = g->outputs()->create();
- output->index(graph_output->index());
-}
+protected:
+ luci::CircleFullyConnected *_fc = nullptr;
+ luci::CircleConst *_weights = nullptr;
+};
-TEST(ShuffleWeightTo16x1Float32PassTest, SimpleTest1)
+class FCGraph : public TestIGraphlet, public TestOGraphlet, public FCGraphlet
{
- auto graph = loco::make_graph();
- create_fc_net(graph.get());
+public:
+ FCGraph() = default;
- luci::CircleFullyConnected *fc_node = nullptr;
- for (auto node : loco::active_nodes(loco::output_nodes(graph.get())))
+ void init(const ShapeU32 shape, const ShapeU32 wshape)
{
- auto fc = dynamic_cast<luci::CircleFullyConnected *>(node);
- if (not fc)
- continue;
+ TestIGraphlet::init(g(), shape);
+ TestOGraphlet::init(g(), shape);
+ FCGraphlet::init(g(), wshape);
+
+ // connect graph
+ _fc->input(input());
+ _fc->weights(_weights);
- fc_node = fc;
- break;
+ output()->from(_fc);
}
+};
+
+} // namespace
+
+TEST(ShuffleWeightTo16x1Float32PassTest, name)
+{
+ luci::ShuffleWeightTo16x1Float32Pass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+const uint32_t ROW = 16;
+const uint32_t COL = 2;
+
+TEST(ShuffleWeightTo16x1Float32PassTest, SimpleTest1)
+{
+ FCGraph g;
+
+ g.init({ROW, COL}, {ROW, COL});
+
+ auto fc_node = luci::test::first_node<luci::CircleFullyConnected>(g.g());
ASSERT_NE(fc_node, nullptr);
auto weights = loco::must_cast<luci::CircleConst *>(fc_node->weights());
// before
ASSERT_EQ(15, weights->at<loco::DataType::FLOAT32>(15));
luci::ShuffleWeightTo16x1Float32Pass pass;
- while (pass.run(graph.get()))
+ while (pass.run(g.g()))
;
weights = loco::must_cast<luci::CircleConst *>(fc_node->weights());
ASSERT_EQ(28, weights->at<loco::DataType::FLOAT32>(14));
ASSERT_EQ(30, weights->at<loco::DataType::FLOAT32>(15));
}
+
+TEST(ShuffleWeightTo16x1Float32PassTest, invalid_weight_shape_NEG)
+{
+ FCGraph g;
+
+ g.init({ROW, COL}, {1, ROW, COL, 1});
+
+ auto fc_node = luci::test::first_node<luci::CircleFullyConnected>(g.g());
+ ASSERT_NE(fc_node, nullptr);
+
+ luci::ShuffleWeightTo16x1Float32Pass pass;
+ auto ret = pass.run(g.g());
+
+ ASSERT_FALSE(ret);
+}
+
+TEST(ShuffleWeightTo16x1Float32PassTest, invalid_weight_row16_NEG)
+{
+ FCGraph g;
+
+ g.init({COL, ROW}, {COL, ROW});
+
+ auto fc_node = luci::test::first_node<luci::CircleFullyConnected>(g.g());
+ ASSERT_NE(fc_node, nullptr);
+
+ luci::ShuffleWeightTo16x1Float32Pass pass;
+ auto ret = pass.run(g.g());
+
+ ASSERT_FALSE(ret);
+}
const std::vector<DimensionType> &format,
const std::vector<int32_t> &block_size,
const std::vector<int32_t> &block_map)
- : _dense_shape(shape), _traversal_order(traversal_order), _block_size(block_size),
- _block_map(block_map)
+ : _dense_shape(shape), _traversal_order(traversal_order), _block_size(block_size),
+ _block_map(block_map)
{
_dense_size = 1;
int32_t block_dim = 0;
const std::vector<int32_t> block_size = {4, 1};
const std::vector<int32_t> block_map = {0, 1};
EXPECT_THROW(
- luci::Sparsifier<int32_t>(dense_shape, traversal_order, format, block_size, block_map),
- std::out_of_range);
+ luci::Sparsifier<int32_t>(dense_shape, traversal_order, format, block_size, block_map),
+ std::out_of_range);
}
else if (_format.at(idx) == DimensionType::SPARSE_CSR)
{
sparsityparam->dim_metadata.emplace_back(
- DimensionType::SPARSE_CSR, /* dense size */ 0,
- /* array_segments */ SparseIndexVector{SparseIndexVectorType::U16,
- dim_metadata.at(idx * 2)},
- /* array_indices */ SparseIndexVector{SparseIndexVectorType::U16,
- dim_metadata.at(idx * 2 + 1)});
+ DimensionType::SPARSE_CSR, /* dense size */ 0,
+ /* array_segments */
+ SparseIndexVector{SparseIndexVectorType::U16, dim_metadata.at(idx * 2)},
+ /* array_indices */
+ SparseIndexVector{SparseIndexVectorType::U16, dim_metadata.at(idx * 2 + 1)});
}
}
for (uint32_t i = 0; i < _block_size.size(); i++)
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/SparsifyTensorPass.h"
+
+#include <gtest/gtest.h>
+
+TEST(SparsifyTensorPassTest, name)
+{
+ std::vector<int32_t> to;
+ std::vector<luci::DimensionType> vdt;
+ std::vector<int32_t> bs;
+ std::vector<int32_t> bm;
+ luci::SparsifyTensorPass pass("", to, vdt, bs, bm);
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
#include "luci/Pass/SubstitutePackToReshapePass.h"
#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
namespace
{
+int32_t unknown_dim_count(luci::CircleNode *node)
+{
+ int32_t count = 0;
+
+ for (uint32_t i = 0; i < node->rank(); ++i)
+ if (!node->dim(i).known())
+ ++count;
+
+ return count;
+}
+
bool substitute_pack_to_reshape(luci::CircleNode *node)
{
auto target_node = dynamic_cast<luci::CirclePack *>(node);
if (axis < 0)
axis = axis + static_cast<int32_t>(value_node->rank()) + 1;
+ auto name = node->name();
+ assert(name.length() > 0);
+
auto graph = target_node->graph();
auto reshape_node = graph->nodes()->create<luci::CircleReshape>();
reshape_node->tensor(value_node);
+ reshape_node->name(name + "/Reshape");
+ luci::add_origin(reshape_node, luci::get_origin(node));
auto const_node = graph->nodes()->create<luci::CircleConst>();
const_node->dtype(loco::DataType::S32);
}
else if (i < axis)
{
- const_node->at<loco::DataType::S32>(i) = value_node->dim(i).value();
+ const_node->at<loco::DataType::S32>(i) =
+ value_node->dim(i).known() ? value_node->dim(i).value() : -1;
}
else
{
- const_node->at<loco::DataType::S32>(i) = value_node->dim(i - 1).value();
+ const_node->at<loco::DataType::S32>(i) =
+ value_node->dim(i - 1).known() ? value_node->dim(i - 1).value() : -1;
}
}
+ const_node->name(name + "/Reshape/shape");
reshape_node->shape(const_node);
replace(target_node).with(reshape_node);
return true;
{
/**
- * BEFORE
- * |
- * [CircleNode]
- * |
- * [CirclePack]
- * |
- * [CircleNode]
- * |
+ * BEFORE
+ * |
+ * [CircleNode]
+ * |
+ * [CirclePack]
+ * |
+ * [CircleNode]
+ * |
*
- * AFTER
- * |
- * [CircleNode] [CircleConst]
- * \ /
- * [CircleReshape]
+ * AFTER
* |
- * [CircleNode]
- * |
- *
+ * [CircleNode] [CircleConst]
+ * | \ /
+ * [CirclePack] [CircleReshape]
+ * |
+ * [CircleNode]
+ * |
*/
bool SubstitutePackToReshapePass::run(loco::Graph *g)
{
for (auto node : loco::active_nodes(loco::output_nodes(g)))
{
auto circle_node = loco::must_cast<luci::CircleNode *>(node);
- if (substitute_pack_to_reshape(circle_node))
+ if (unknown_dim_count(circle_node) <= 1 && substitute_pack_to_reshape(circle_node))
{
changed = true;
}
namespace
{
-/**
- * BEFORE
- * |
- * [CircleNode]
- * |
- * [CirclePack]
- * |
- * [CircleNode]
- * |
- *
- * AFTER
- * |
- * [CircleNode] [CircleConst]
- * \ /
- * [CircleReshape]
- * |
- * [CircleNode]
- * |
- *
- */
void create_substitute_pack_to_reshape(loco::Graph *g, const std::initializer_list<uint32_t> shape,
int32_t axis)
{
input->shape_status(luci::ShapeStatus::VALID);
input->rank(shape.size());
input->shape(shape);
+ input->name("input");
// Pack Node create.
auto pack = g->nodes()->create<luci::CirclePack>(1);
pack->values(0, input);
pack->axis(axis);
+ pack->name("pack");
// Output Connect.
auto output = g->nodes()->create<luci::CircleOutput>();
output->from(pack);
auto graph_output = g->outputs()->create();
output->index(graph_output->index());
+ output->name("output");
return;
}
} // namespace
+TEST(SubstitutePackToReshapePassTest, name)
+{
+ luci::SubstitutePackToReshapePass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
TEST(SubstitutePackToReshapePass, simple_case)
{
auto graph = loco::make_graph();
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/SubstituteSqueezeToReshapePass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+
+/**
+ * @brief return TRUE if all dim is known
+ * @note This pass can be applied even some of dimensions are unknown.
+ For now, do not consider about it and update logic later.
+ */
+bool can_squeeze_shape(const luci::CircleNode *node)
+{
+ for (uint32_t r = 0; r < node->rank(); ++r)
+ {
+ if (not node->dim(r).known())
+ return false;
+ }
+ return true;
+}
+
+/**
+ * @brief return valid unsigned dim value from 0 ~ (rank-1)
+ * @note dim can be -rank to (rank-1)
+ */
+uint32_t valid_unsigned_dim(uint32_t rank, int32_t dim)
+{
+ int32_t irank = static_cast<int32_t>(rank);
+ return dim >= 0 ? static_cast<uint32_t>(dim) : static_cast<uint32_t>(irank + dim);
+}
+
+/**
+ * @brief return TRUE if input dim is 1 for squeeze_dims values
+ */
+bool is_valid_input(const luci::CircleNode *node, const std::vector<int32_t> &squeeze_dims)
+{
+ auto rank = node->rank();
+ for (auto dim : squeeze_dims)
+ {
+ auto udim = valid_unsigned_dim(rank, dim);
+ if (node->dim(udim).value() != 1)
+ return false;
+ }
+ return true;
+}
+
+/**
+ * @brief return shape vector from input
+ */
+std::vector<uint32_t> node_shape(const luci::CircleNode *input)
+{
+ std::vector<uint32_t> shape;
+ uint32_t rank = input->rank();
+ for (uint32_t r = 0; r < rank; ++r)
+ shape.push_back(input->dim(r).value());
+
+ return shape;
+}
+
+/**
+ * @brief return CircleConst ptr with values of new_shape
+ */
+luci::CircleConst *create_shape_const(loco::Graph *graph, const std::vector<uint32_t> &new_shape)
+{
+ // NOTE dim_size can be 0
+ uint32_t dim_size = static_cast<uint32_t>(new_shape.size());
+
+ auto shape_const = graph->nodes()->create<luci::CircleConst>();
+
+ // const shape/dtype
+ shape_const->dtype(loco::DataType::S32);
+ if (dim_size > 0)
+ {
+ shape_const->rank(1);
+ shape_const->dim(0).set(dim_size);
+ }
+ else
+ shape_const->rank(0);
+ shape_const->shape_status(luci::ShapeStatus::VALID);
+
+ // constant values
+ shape_const->size<loco::DataType::S32>(dim_size);
+ for (uint32_t i = 0; i < dim_size; ++i)
+ shape_const->at<loco::DataType::S32>(i) = new_shape.at(i);
+
+ return shape_const;
+}
+
+bool substitute_squeeze_to_reshape(luci::CircleSqueeze *squeeze)
+{
+ assert(squeeze != nullptr);
+
+ auto input = loco::must_cast<luci::CircleNode *>(squeeze->input());
+ // we need input node shape and all dim should be known
+ if (input->shape_status() != luci::ShapeStatus::VALID)
+ return false;
+ if (not can_squeeze_shape(input))
+ return false;
+
+ // we will use squeeze shape for new shape
+ if (squeeze->shape_status() != luci::ShapeStatus::VALID)
+ return false;
+
+ auto squeeze_dims = squeeze->squeeze_dims();
+ if (not is_valid_input(input, squeeze_dims))
+ throw std::runtime_error("Invalid values in squeeze_dims: " + squeeze->name());
+
+ auto name = squeeze->name();
+ assert(name.length() > 0);
+
+ auto reshape_shape = node_shape(squeeze);
+ auto graph = squeeze->graph();
+ auto reshape = graph->nodes()->create<luci::CircleReshape>();
+ auto shape_const = create_shape_const(graph, reshape_shape);
+ reshape->name(name + "/Reshape");
+ luci::add_origin(reshape, luci::get_origin(squeeze));
+ shape_const->name(name + "/Reshape/shape");
+
+ // graph connection
+ reshape->tensor(input);
+ reshape->shape(shape_const);
+ replace(squeeze).with(reshape);
+
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * BEFORE
+ * |
+ * [CircleNode]
+ * |
+ * [CircleSqueeze]
+ * |
+ * [CircleNode]
+ * |
+ *
+ * AFTER
+ * |
+ * [CircleNode] [CircleConst]
+ * | \ /
+ * [CircleSqueeze] [CircleReshape]
+ * |
+ * [CircleNode]
+ * |
+ */
+bool SubstituteSqueezeToReshapePass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ if (auto squeeze = dynamic_cast<luci::CircleSqueeze *>(node))
+ {
+ if (substitute_squeeze_to_reshape(squeeze))
+ changed = true;
+ }
+ }
+ return changed;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci/Pass/SubstituteSqueezeToReshapePass.h"
+#include "luci/Pass/CircleShapeInferencePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using uilist = std::initializer_list<uint32_t>;
+using ilist = std::initializer_list<int32_t>;
+
+class PassTestGraph
+{
+public:
+ PassTestGraph() = default;
+
+public:
+ void init(const uilist shape_in, const uilist shape_out)
+ {
+ _graph_input = _g.inputs()->create();
+ _graph_output = _g.outputs()->create();
+
+ _input = _g.nodes()->create<luci::CircleInput>();
+ _input->shape(shape_in);
+ _input->shape_status(luci::ShapeStatus::VALID);
+ _input->name("input");
+
+ _output = _g.nodes()->create<luci::CircleOutput>();
+ _output->shape(shape_out);
+ _output->shape_status(luci::ShapeStatus::VALID);
+ _output->name("output");
+
+ _input->index(_graph_input->index());
+ _output->index(_graph_output->index());
+
+ auto input_shape = std::make_unique<loco::TensorShape>();
+ set(input_shape.get(), shape_in);
+ _graph_input->shape(std::move(input_shape));
+
+ auto output_shape = std::make_unique<loco::TensorShape>();
+ set(output_shape.get(), shape_out);
+ _graph_output->shape(std::move(output_shape));
+ }
+
+protected:
+ void set(loco::TensorShape *shape, const uilist &values)
+ {
+ uint32_t r = 0;
+ shape->rank(values.size());
+ for (auto v : values)
+ shape->dim(r++).set(v);
+ }
+
+public:
+ loco::Graph *g(void) { return &_g; }
+ luci::CircleOutput *output(void) { return _output; }
+
+protected:
+ loco::Graph _g;
+ loco::GraphInput *_graph_input = nullptr;
+ loco::GraphOutput *_graph_output = nullptr;
+ luci::CircleInput *_input = nullptr;
+ luci::CircleOutput *_output = nullptr;
+};
+
+class SubstituteSqueezeToReshapeGraph : public PassTestGraph
+{
+public:
+ SubstituteSqueezeToReshapeGraph() = default;
+
+public:
+ void init(const uilist shape_in, const uilist shape_out, const ilist squeeze_dims)
+ {
+ PassTestGraph::init(shape_in, shape_out);
+
+ _squeeze = _g.nodes()->create<luci::CircleSqueeze>();
+ _squeeze->input(_input);
+ _squeeze->squeeze_dims(squeeze_dims);
+ _squeeze->name("squeeze");
+
+ _output->from(_squeeze);
+ }
+
+protected:
+ luci::CircleSqueeze *_squeeze = nullptr;
+};
+
+class SubstituteSqueezeToReshapeTest : public ::testing::Test
+{
+public:
+ SubstituteSqueezeToReshapeTest() = default;
+
+ void run_pass(void)
+ {
+ while (_shapeinf.run(_graph.g()) || _pass.run(_graph.g()))
+ ;
+ }
+
+protected:
+ SubstituteSqueezeToReshapeGraph _graph;
+ luci::SubstituteSqueezeToReshapePass _pass;
+ luci::CircleShapeInferencePass _shapeinf;
+};
+
+} // namespace
+
+TEST(SubstituteSqueezeToReshapePassTest, name)
+{
+ luci::SubstituteSqueezeToReshapePass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+TEST_F(SubstituteSqueezeToReshapeTest, simple_with_squeeze_dims)
+{
+ _graph.init({1, 16, 1, 1}, {1, 16}, {2, 3});
+
+ run_pass();
+
+ auto reshape = dynamic_cast<luci::CircleReshape *>(_graph.output()->from());
+ auto squeeze = dynamic_cast<luci::CircleSqueeze *>(_graph.output()->from());
+ ASSERT_NE(nullptr, reshape);
+ ASSERT_EQ(nullptr, squeeze);
+ auto reshape_shape = loco::must_cast<luci::CircleConst *>(reshape->shape());
+ ASSERT_EQ(2, reshape_shape->size<loco::DataType::S32>());
+ ASSERT_EQ(1, reshape_shape->at<loco::DataType::S32>(0));
+ ASSERT_EQ(16, reshape_shape->at<loco::DataType::S32>(1));
+}
+
+TEST_F(SubstituteSqueezeToReshapeTest, simple_without_squeeze_dims)
+{
+ _graph.init({1, 16, 1, 1}, {16}, {});
+
+ run_pass();
+
+ auto reshape = dynamic_cast<luci::CircleReshape *>(_graph.output()->from());
+ auto squeeze = dynamic_cast<luci::CircleSqueeze *>(_graph.output()->from());
+ ASSERT_NE(nullptr, reshape);
+ ASSERT_EQ(nullptr, squeeze);
+ auto reshape_shape = loco::must_cast<luci::CircleConst *>(reshape->shape());
+ ASSERT_EQ(1, reshape_shape->size<loco::DataType::S32>());
+ ASSERT_EQ(16, reshape_shape->at<loco::DataType::S32>(0));
+}
+
+TEST_F(SubstituteSqueezeToReshapeTest, input_with_0_dims)
+{
+ _graph.init({1, 16, 0, 1}, {16, 0}, {});
+
+ run_pass();
+
+ auto reshape = dynamic_cast<luci::CircleReshape *>(_graph.output()->from());
+ auto squeeze = dynamic_cast<luci::CircleSqueeze *>(_graph.output()->from());
+ ASSERT_NE(nullptr, reshape);
+ ASSERT_EQ(nullptr, squeeze);
+ auto reshape_shape = loco::must_cast<luci::CircleConst *>(reshape->shape());
+ ASSERT_EQ(2, reshape_shape->size<loco::DataType::S32>());
+ ASSERT_EQ(16, reshape_shape->at<loco::DataType::S32>(0));
+ ASSERT_EQ(0, reshape_shape->at<loco::DataType::S32>(1));
+}
+
+TEST_F(SubstituteSqueezeToReshapeTest, nothing_to_squeeze)
+{
+ _graph.init({2, 16, 16, 3}, {2, 16, 16, 3}, {});
+
+ run_pass();
+
+ auto reshape = dynamic_cast<luci::CircleReshape *>(_graph.output()->from());
+ auto squeeze = dynamic_cast<luci::CircleSqueeze *>(_graph.output()->from());
+ ASSERT_NE(nullptr, reshape);
+ ASSERT_EQ(nullptr, squeeze);
+}
+
+TEST_F(SubstituteSqueezeToReshapeTest, all_to_squeeze)
+{
+ _graph.init({1, 1}, {}, {});
+
+ run_pass();
+
+ auto reshape = dynamic_cast<luci::CircleReshape *>(_graph.output()->from());
+ auto squeeze = dynamic_cast<luci::CircleSqueeze *>(_graph.output()->from());
+ ASSERT_NE(nullptr, reshape);
+ ASSERT_EQ(nullptr, squeeze);
+}
+
+TEST_F(SubstituteSqueezeToReshapeTest, wrong_squeeze_dims_NEG)
+{
+ _graph.init({1, 16, 1, 1}, {1, 16, 1, 1}, {1});
+
+ // shape inference will throw for invalid squeeze_dims
+ EXPECT_THROW(run_pass(), std::exception);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/SubstituteTransposeToReshapePass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+
+/**
+ * @brief Convert transpose op in a certain condition to reshape op
+ * @details Convert transpose op if it have condition below
+ * 1. have a CircleConst perm value.
+ * 2. input have an unknown dimension less then 2
+ * 3. the order of shape that except dim value 1 remains same on input and output
+ * eg) input shape = (126, 201, 1, 1) => (126, 201)
+ * output shape = (1, 126, 1, 201) => (126, 201)
+ */
+bool substitute_transpose_to_reshape(luci::CircleTranspose *node)
+{
+ auto perm_const = dynamic_cast<luci::CircleConst *>(node->perm());
+ if (perm_const == nullptr)
+ return false;
+
+ assert(perm_const->dtype() == loco::DataType::S32);
+
+ auto input_node = loco::must_cast<luci::CircleNode *>(node->a());
+ if (perm_const->dim(0).value() != input_node->rank())
+ return false;
+
+ // If input have more than 2 unknown dimension, transpose will not be changed.
+ int count = 0;
+ for (uint32_t i = 0; i < input_node->rank(); i++)
+ if (!input_node->dim(i).known())
+ count++;
+ if (count > 1)
+ return false;
+
+ uint32_t idx = 0;
+ auto size_items = perm_const->size<loco::DataType::S32>();
+ for (uint32_t i = 0; i < size_items; i++)
+ {
+ assert(perm_const->at<loco::DataType::S32>(i) >= 0 &&
+ perm_const->at<loco::DataType::S32>(i) < static_cast<int32_t>(input_node->rank()));
+ const auto perm_value = static_cast<uint32_t>(perm_const->at<loco::DataType::S32>(i));
+ if (input_node->dim(perm_value).known() && input_node->dim(perm_value).value() == 1)
+ continue;
+ // To check idx values are increasing
+ if (idx > perm_value)
+ return false;
+ idx = perm_value;
+ }
+
+ auto name = node->name();
+ assert(name.length() > 0);
+
+ auto new_const_node = node->graph()->nodes()->create<luci::CircleConst>();
+ new_const_node->dtype(loco::DataType::S32);
+ new_const_node->size<loco::DataType::S32>(size_items);
+ new_const_node->shape_status(luci::ShapeStatus::VALID);
+ new_const_node->rank(1);
+ new_const_node->dim(0).set(size_items);
+ for (uint32_t i = 0; i < size_items; i++)
+ {
+ if (input_node->dim(static_cast<uint32_t>(perm_const->at<loco::DataType::S32>(i))).known())
+ new_const_node->at<loco::DataType::S32>(i) = static_cast<int32_t>(
+ input_node->dim(static_cast<uint32_t>(perm_const->at<loco::DataType::S32>(i))).value());
+ else
+ new_const_node->at<loco::DataType::S32>(i) = -1;
+ }
+
+ auto new_reshape_node = node->graph()->nodes()->create<luci::CircleReshape>();
+ new_reshape_node->tensor(input_node);
+ new_reshape_node->shape(new_const_node);
+ new_reshape_node->name(name + "/Reshape");
+ luci::add_origin(new_reshape_node, luci::get_origin(node));
+ new_const_node->name(name + "/Reshape/shape");
+
+ replace(node).with(new_reshape_node);
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * BEFORE
+ *
+ * [CircleNode] [CircleConst]
+ * \ /
+ * [CircleTranspose]
+ * |
+ * [CircleNode]
+ *
+ * AFTER
+ *
+ * [CircleNode] [CircleConst]
+ * \ /
+ * [CircleReshape]
+ * |
+ * [CircleNode]
+ *
+ */
+bool SubstituteTransposeToReshapePass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ if (auto circle_node = dynamic_cast<luci::CircleTranspose *>(node))
+ {
+ if (substitute_transpose_to_reshape(circle_node))
+ {
+ changed = true;
+ }
+ }
+ }
+ return changed;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci/Pass/SubstituteTransposeToReshapePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+class SubstituteTransposeToReshapeTest : public ::testing::Test
+{
+public:
+ SubstituteTransposeToReshapeTest() {}
+
+ void buildGraph(const std::initializer_list<uint32_t> shape, const std::vector<int32_t> perm)
+ {
+ // Input Create.
+ input = g.nodes()->create<luci::CircleInput>();
+ auto graph_input = g.inputs()->create();
+ input->index(graph_input->index());
+ input->shape_status(luci::ShapeStatus::VALID);
+ input->rank(shape.size());
+ input->shape(shape);
+ input->name("input");
+
+ // Permutation Create.
+ auto perm_const = g.nodes()->create<luci::CircleConst>();
+ perm_const->dtype(loco::DataType::S32);
+ perm_const->size<loco::DataType::S32>(perm.size());
+ perm_const->shape_status(luci::ShapeStatus::VALID);
+ perm_const->rank(1);
+ perm_const->dim(0).set(perm.size());
+ for (uint32_t i = 0; i < static_cast<uint32_t>(perm.size()); i++)
+ {
+ perm_const->at<loco::DataType::S32>(i) = perm.at(i);
+ }
+ perm_const->name("perm_const");
+
+ // Transpose Create.
+ auto transpose_node = g.nodes()->create<luci::CircleTranspose>();
+ transpose_node->a(input);
+ transpose_node->perm(perm_const);
+ transpose_node->name("transpose_node");
+
+ // Output Connect.
+ output = g.nodes()->create<luci::CircleOutput>();
+ output->from(transpose_node);
+ auto graph_output = g.outputs()->create();
+ output->index(graph_output->index());
+ output->name("output");
+ }
+
+public:
+ loco::Graph g;
+ luci::CircleInput *input = nullptr;
+ luci::CircleOutput *output = nullptr;
+};
+
+} // namespace
+
+TEST(SubstituteTransposeToReshapePassTest, name)
+{
+ luci::SubstituteTransposeToReshapePass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+TEST_F(SubstituteTransposeToReshapeTest, simple_case)
+{
+ // Create graph that tranpose input {126, 201, 1, 1} with permutation {2, 0, 3, 1}
+ buildGraph({126, 201, 1, 1}, std::vector<int32_t>({2, 0, 3, 1}));
+ // With this input shape and permutation values, output shape will be [1, 126, 1, 201].
+ // The order of non-one values is unchanged (126, 201).
+ // So this Transpose op can be converted to Reshape op.
+ luci::SubstituteTransposeToReshapePass pass;
+ while (pass.run(&g))
+ ;
+
+ auto reshape_node = dynamic_cast<luci::CircleReshape *>(output->from());
+ auto transpose_node = dynamic_cast<luci::CircleTranspose *>(output->from());
+ ASSERT_NE(nullptr, reshape_node);
+ ASSERT_EQ(nullptr, transpose_node);
+ auto new_shape = loco::must_cast<luci::CircleConst *>(reshape_node->shape());
+ ASSERT_EQ(1, new_shape->at<loco::DataType::S32>(0));
+ ASSERT_EQ(126, new_shape->at<loco::DataType::S32>(1));
+ ASSERT_EQ(1, new_shape->at<loco::DataType::S32>(2));
+ ASSERT_EQ(201, new_shape->at<loco::DataType::S32>(3));
+}
+
+TEST_F(SubstituteTransposeToReshapeTest, failed_to_substitute_NEG)
+{
+ // Create graph that tranpose input {126, 201, 1, 1} with permutation {2, 1, 3, 0}
+ buildGraph({126, 201, 1, 1}, std::vector<int32_t>({2, 1, 3, 0}));
+ // With this input shape and permutation values, output shape will be [1, 201, 1, 126].
+ // The order of non-one values is changed (126, 201) -> (201, 126).
+ // So this Transpose op cannot be converted to Reshape op.
+ luci::SubstituteTransposeToReshapePass pass;
+ while (pass.run(&g))
+ ;
+
+ auto reshape_node = dynamic_cast<luci::CircleReshape *>(output->from());
+ auto transpose_node = dynamic_cast<luci::CircleTranspose *>(output->from());
+ ASSERT_EQ(nullptr, reshape_node);
+ ASSERT_NE(nullptr, transpose_node);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/TransformMinMaxToRelu6Pass.h"
+
+#include "helpers/NodeFiller.h"
+#include "helpers/TypeMapper.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+
+template <loco::DataType DT>
+bool is_scalar_with_value(luci::CircleConst *node, typename loco::DataTypeImpl<DT>::Type val)
+{
+ if (node->dtype() != DT)
+ return false;
+ if (node->rank() != 0)
+ return false;
+ if (node->size<DT>() != 1)
+ return false;
+ if (node->at<DT>(0) != static_cast<typename loco::DataTypeImpl<DT>::Type>(val))
+ return false;
+
+ return true;
+}
+
+/**
+ * BEFORE
+ * [CircleNode]
+ * |
+ * [CircleMinimum]
+ * |
+ * [CircleMaximum]
+ * |
+ * [CircleNode]
+ *
+ * AFTER
+ *
+ * [CircleNode]
+ * |
+ * [CircleRelu6]
+ * |
+ * [CircleNode]
+ *
+ * NOTE Only max(min(input, 6), 0) pattern will be transformed.
+ */
+template <loco::DataType DT> bool transform_min_max_pattern(luci::CircleMaximum *maxi)
+{
+ if (not maxi)
+ return false;
+
+ if (maxi->dtype() != DT)
+ return false;
+
+ luci::CircleConst *maxi_const = nullptr;
+ luci::CircleMinimum *mini = nullptr;
+
+ // There are two ways Maximum takes inputs.
+ // 1. Maximum(x = CircleConst, y = CircleMinimum)
+ // 2. Maximum(x = CircleMinimum, y = CircleConst)
+ if (not luci::fill(&maxi_const, &mini).with_commutative_args_of(maxi))
+ return false;
+
+ // Maximum constant should be scalar whose value is 0.
+ if (not is_scalar_with_value<DT>(maxi_const,
+ static_cast<typename loco::DataTypeImpl<DT>::Type>(0)))
+ return false;
+
+ luci::CircleConst *mini_const = nullptr;
+ loco::Node *mini_input = nullptr;
+
+ // There are two ways Miminum takes inputs.
+ // 1. Miminum(x = CircleNode, y = CircleMinimum)
+ // 2. Miminum(x = CircleMinimum, y = CircleNode)
+ if (not luci::fill(&mini_const, &mini_input).with_commutative_args_of(mini))
+ return false;
+
+ // Miminum constant should be scalar whose value is 6.
+ if (not is_scalar_with_value<DT>(mini_const,
+ static_cast<typename loco::DataTypeImpl<DT>::Type>(6)))
+ return false;
+
+ auto name = maxi->name();
+ assert(name.length() > 0);
+
+ // Create Relu6 op
+ auto relu6 = mini->graph()->nodes()->create<luci::CircleRelu6>();
+ relu6->features(mini_input);
+ relu6->name(name + "/Relu6");
+ luci::add_origin(relu6, luci::composite_origin({luci::get_origin(maxi), luci::get_origin(mini)}));
+
+ replace(maxi).with(relu6);
+
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool TransformMinMaxToRelu6Pass::run(loco::Graph *g)
+{
+ bool changed = false;
+
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ if (auto maxi = dynamic_cast<luci::CircleMaximum *>(node))
+ {
+ if (transform_min_max_pattern<loco::DataType::FLOAT32>(maxi))
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/TransformMinMaxToRelu6Pass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+/**
+ * Minimum-Maximum pattern graph
+ *
+ * [CircleInput] [CircleConst]
+ * \ /
+ * [CircleMinimum] [CircleConst]
+ * | /
+ * [CircleMaximum]
+ * |
+ * [CircleOutput]
+ */
+struct MinMaxGraph
+{
+ loco::Graph _g;
+ luci::CircleInput *_input = nullptr;
+ luci::CircleMinimum *_mini = nullptr;
+ luci::CircleConst *_mini_const = nullptr;
+ luci::CircleMaximum *_maxi = nullptr;
+ luci::CircleConst *_maxi_const = nullptr;
+ luci::CircleOutput *_output = nullptr;
+};
+
+class TransformMinMaxToRelu6PassTest : public ::testing::Test
+{
+protected:
+ virtual void SetUp()
+ {
+ const int N = 1;
+ const int H = 4;
+ const int W = 4;
+ const int C = 3;
+
+ // graph input and output
+ auto graph_input = _min_max_g._g.inputs()->create();
+ auto graph_output = _min_max_g._g.outputs()->create();
+
+ // CircleInput
+ _min_max_g._input = _min_max_g._g.nodes()->create<luci::CircleInput>();
+ _min_max_g._input->index(graph_input->index());
+ _min_max_g._input->shape({N, H, W, C});
+ _min_max_g._input->dtype(loco::DataType::FLOAT32);
+ _min_max_g._input->name("input");
+
+ // CircleConst
+ _min_max_g._mini_const = _min_max_g._g.nodes()->create<luci::CircleConst>();
+ _min_max_g._mini_const->shape({}); // scalar
+ _min_max_g._mini_const->dtype(loco::DataType::FLOAT32);
+ _min_max_g._mini_const->size<loco::DataType::FLOAT32>(1);
+ _min_max_g._mini_const->at<loco::DataType::FLOAT32>(0) = 6.;
+ _min_max_g._mini_const->name("mini_const");
+
+ // CircleMinimum
+ _min_max_g._mini = _min_max_g._g.nodes()->create<luci::CircleMinimum>();
+ _min_max_g._mini->x(_min_max_g._input);
+ _min_max_g._mini->y(_min_max_g._mini_const);
+ _min_max_g._mini->shape({N, H, W, C});
+ _min_max_g._mini->dtype(loco::DataType::FLOAT32);
+ _min_max_g._mini->name("mini");
+
+ // CircleConst
+ _min_max_g._maxi_const = _min_max_g._g.nodes()->create<luci::CircleConst>();
+ _min_max_g._mini_const->shape({}); // scalar
+ _min_max_g._maxi_const->dtype(loco::DataType::FLOAT32);
+ _min_max_g._maxi_const->size<loco::DataType::FLOAT32>(1);
+ _min_max_g._maxi_const->at<loco::DataType::FLOAT32>(0) = 0.;
+ _min_max_g._maxi_const->name("maxi_const");
+
+ // CircleMaximum
+ _min_max_g._maxi = _min_max_g._g.nodes()->create<luci::CircleMaximum>();
+ _min_max_g._maxi->x(_min_max_g._mini);
+ _min_max_g._maxi->y(_min_max_g._maxi_const);
+ _min_max_g._maxi->shape({N, H, W, C});
+ _min_max_g._maxi->dtype(loco::DataType::FLOAT32);
+ _min_max_g._maxi->name("maxi");
+
+ // CircleOutput
+ _min_max_g._output = _min_max_g._g.nodes()->create<luci::CircleOutput>();
+ _min_max_g._output->index(graph_output->index());
+ _min_max_g._output->from(_min_max_g._maxi);
+ _min_max_g._output->shape({N, H, W, C});
+ _min_max_g._output->dtype(loco::DataType::FLOAT32);
+ _min_max_g._output->name("output");
+ }
+
+protected:
+ luci::TransformMinMaxToRelu6Pass _pass;
+ MinMaxGraph _min_max_g;
+};
+
+} // namespace
+
+TEST_F(TransformMinMaxToRelu6PassTest, name)
+{
+ auto const name = _pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+/**
+ * Optimized graph looks like below.
+ *
+ * [CircleInput]
+ * |
+ * [CircleRelu6]
+ * |
+ * [CircleOutput]
+ */
+TEST_F(TransformMinMaxToRelu6PassTest, simple_test)
+{
+ auto ret = _pass.run(&_min_max_g._g);
+ EXPECT_TRUE(ret);
+
+ auto relu6 = dynamic_cast<luci::CircleRelu6 *>(_min_max_g._output->from());
+ EXPECT_NE(nullptr, relu6);
+
+ auto input = dynamic_cast<luci::CircleInput *>(relu6->features());
+ EXPECT_NE(nullptr, input);
+}
+
+TEST_F(TransformMinMaxToRelu6PassTest, wrong_condition_NEG)
+{
+ _min_max_g._maxi_const->at<loco::DataType::FLOAT32>(0) = 2.;
+
+ auto ret = _pass.run(&_min_max_g._g);
+
+ EXPECT_FALSE(ret);
+}
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci/Pass/TypeInferencePass.h"
-
-#include <luci/IR/CircleDialect.h>
-#include <luci/Service/CircleTypeInferenceRule.h>
-
-#include <loco.h>
-#include <loco/IR/CanonicalDialect.h>
-#include <loco/Service/TypeInference.h>
-
-namespace luci
-{
-
-bool TypeInferencePass::run(luci::Module *m)
-{
- bool changed = false;
-
- for (size_t g = 0; g < m->size(); ++g)
- {
- if (run(m->graph(g)))
- changed = true;
- }
-
- return changed;
-}
-
-bool TypeInferencePass::run(loco::Graph *g)
-{
- loco::CanonicalTypeInferenceRule canonical_rule;
- luci::CircleTypeInferenceRule circle_rule;
-
- loco::MultiDialectTypeInferenceRule rules;
-
- rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
- .bind(luci::CircleDialect::get(), &circle_rule);
-
- return loco::apply(&rules).to(g);
-}
-
-} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_VERIFY_QUANTIZED_NODE_CHANNELWISE_GRANULARITY_H__
+#define __LUCI_VERIFY_QUANTIZED_NODE_CHANNELWISE_GRANULARITY_H__
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Pass/QuantizationParameters.h>
+
+using Granularity = luci::QuantizationGranularity;
+
+// This macro is undef at the end of the file
+#define RETURN_FALSE_UNLESS(ARG) \
+ if (not(ARG)) \
+ { \
+ return false; \
+ }
+
+namespace luci
+{
+
+/**
+ * @brief Verify the granualrity of channel-wise quantized node
+ * @details
+ *
+ * Targets to verify
+ * - node's output (i.e., node itself)
+ * - node's inputs
+ */
+struct VerifyQuantizedNodeChannelWiseGranularity final : public luci::CircleNodeVisitor<bool>
+{
+private:
+ bool is_lwq(const loco::Node *node)
+ {
+ auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
+
+ if (circle_node->quantparam() == nullptr)
+ return false;
+
+ if (circle_node->quantparam()->scale.size() != 1)
+ return false;
+
+ if (circle_node->quantparam()->zerop.size() != 1)
+ return false;
+
+ return true;
+ }
+
+ uint32_t rank(const loco::Node *node)
+ {
+ auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
+ return circle_node->rank();
+ }
+
+ bool is_cwq_const(const loco::Node *node, uint32_t channel_dim)
+ {
+ auto circle_node = loco::must_cast<const luci::CircleConst *>(node);
+
+ assert(channel_dim < circle_node->rank()); // FIX_CALLER_UNLESS
+ auto channel_size = circle_node->dim(channel_dim).value();
+
+ if (circle_node->quantparam() == nullptr)
+ return false;
+
+ if (circle_node->quantparam()->quantized_dimension != static_cast<int32_t>(channel_dim))
+ return false;
+
+ if (circle_node->quantparam()->scale.size() != channel_size)
+ return false;
+
+ if (circle_node->quantparam()->zerop.size() != channel_size)
+ return false;
+
+ return true;
+ }
+
+private:
+ bool visit(const luci::CircleConv2D *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->input()))
+ RETURN_FALSE_UNLESS(is_cwq_const(node->filter(), 0))
+ RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1))
+ return true;
+ }
+
+ bool visit(const luci::CircleConcatenation *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ for (uint32_t i = 0; i < node->numValues(); i++)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node->values(i)));
+ }
+ return true;
+ }
+
+ bool visit(const luci::CircleDepthToSpace *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->input()))
+ return true;
+ }
+
+ bool visit(const luci::CircleDepthwiseConv2D *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->input()))
+ RETURN_FALSE_UNLESS(is_cwq_const(node->filter(), 3))
+ RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1))
+ return true;
+ }
+
+ bool visit(const luci::CircleInstanceNorm *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->input()))
+ RETURN_FALSE_UNLESS(is_cwq_const(node->gamma(), rank(node->gamma()) - 1))
+ RETURN_FALSE_UNLESS(is_cwq_const(node->beta(), rank(node->beta()) - 1))
+ return true;
+ }
+
+ bool visit(const luci::CirclePad *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->input()))
+ return true;
+ }
+
+ bool visit(const luci::CirclePRelu *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->input()))
+ RETURN_FALSE_UNLESS(is_cwq_const(node->alpha(), rank(node->alpha()) - 1))
+ return true;
+ }
+
+ bool visit(const luci::CircleTransposeConv *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->outBackprop()))
+ RETURN_FALSE_UNLESS(is_cwq_const(node->filter(), 0))
+ luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+ if (bias != nullptr)
+ RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1))
+
+ return true;
+ }
+
+ bool visit(const luci::CircleFullyConnected *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->input()))
+ RETURN_FALSE_UNLESS(is_cwq_const(node->weights(), 0))
+ RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1))
+ return true;
+ }
+
+ bool visit(const luci::CircleAdd *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->x()));
+ RETURN_FALSE_UNLESS(is_lwq(node->y()));
+ return true;
+ }
+
+ bool visit(const luci::CircleAveragePool2D *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->value()));
+ return true;
+ }
+
+ bool visit(const luci::CircleLogicalOr *)
+ {
+ // Logical OR has bool-type inputs and output
+ // Nothing to be checked
+ return true;
+ }
+
+ bool visit(const luci::CircleMaxPool2D *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->value()));
+ return true;
+ }
+
+ bool visit(const luci::CircleMean *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->input()));
+ return true;
+ }
+
+ bool visit(const luci::CircleMul *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->x()));
+ RETURN_FALSE_UNLESS(is_lwq(node->y()));
+ return true;
+ }
+
+ bool visit(const luci::CircleNotEqual *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node->x()));
+ RETURN_FALSE_UNLESS(is_lwq(node->y()));
+ return true;
+ }
+
+ bool visit(const luci::CircleRelu *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->features()));
+ return true;
+ }
+
+ bool visit(const luci::CircleReshape *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->tensor()));
+ return true;
+ }
+
+ bool visit(const luci::CircleLogistic *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->x()));
+ return true;
+ }
+
+ bool visit(const luci::CircleSoftmax *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->logits()));
+ return true;
+ }
+
+ bool visit(const luci::CircleSpaceToBatchND *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->input()));
+ return true;
+ }
+
+ bool visit(const luci::CircleSpaceToDepth *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->input()));
+ return true;
+ }
+
+ bool visit(const luci::CircleSlice *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->input()));
+ return true;
+ }
+
+ bool visit(const luci::CircleSplit *node)
+ {
+ // node's output is the input of CircleSplitOut, thus not quantized
+ RETURN_FALSE_UNLESS(is_lwq(node->input()));
+ return true;
+ }
+
+ bool visit(const luci::CircleSplitOut *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ return true;
+ }
+
+ bool visit(const luci::CircleStridedSlice *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->input()));
+ return true;
+ }
+
+ bool visit(const luci::CircleArgMax *node)
+ {
+ // node's output is index, thus not quantized
+ RETURN_FALSE_UNLESS(is_lwq(node->input()));
+ return true;
+ }
+
+ bool visit(const luci::CircleBatchToSpaceND *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->input()));
+ return true;
+ }
+
+ bool visit(const luci::CircleTanh *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->x()));
+ return true;
+ }
+
+ bool visit(const luci::CircleTranspose *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->a()));
+ return true;
+ }
+
+ bool visit(const luci::CircleFloor *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->x()));
+ return true;
+ }
+
+ bool visit(const luci::CircleGreater *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node->x()));
+ RETURN_FALSE_UNLESS(is_lwq(node->y()));
+ return true;
+ }
+
+ bool visit(const luci::CircleGreaterEqual *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node->x()));
+ RETURN_FALSE_UNLESS(is_lwq(node->y()));
+ return true;
+ }
+
+ bool visit(const luci::CircleDiv *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->x()));
+ RETURN_FALSE_UNLESS(is_lwq(node->y()));
+ return true;
+ }
+
+ bool visit(const luci::CircleFloorDiv *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->x()));
+ RETURN_FALSE_UNLESS(is_lwq(node->y()));
+ return true;
+ }
+
+ bool visit(const luci::CircleRsqrt *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->x()));
+ return true;
+ }
+
+ bool visit(const luci::CircleSqrt *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->x()));
+ return true;
+ }
+
+ bool visit(const luci::CircleElu *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->features()));
+ return true;
+ }
+
+ bool visit(const luci::CirclePow *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->x()));
+ RETURN_FALSE_UNLESS(is_lwq(node->y()));
+ return true;
+ }
+
+ bool visit(const luci::CircleResizeBilinear *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->input()));
+ return true;
+ }
+
+ // TODO: Implement more Ops
+
+ bool visit(const luci::CircleNode *) { return true; }
+};
+
+} // namespace luci
+
+#undef RETURN_FALSE_UNLESS
+
+#endif // __LUCI_VERIFY_QUANTIZED_NODE_CHANNELWISE_GRANULARITY_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_VERIFY_QUANTIZED_NODE_LAYERWISE_GRANULARITY_H__
+#define __LUCI_VERIFY_QUANTIZED_NODE_LAYERWISE_GRANULARITY_H__
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Pass/QuantizationParameters.h>
+
+using Granularity = luci::QuantizationGranularity;
+
+// This macro is undef at the end of the file
+#define RETURN_FALSE_UNLESS(ARG) \
+ if (not(ARG)) \
+ { \
+ return false; \
+ }
+
+namespace luci
+{
+
+/**
+ * @brief Verify the granualrity of layer-wise quantized node
+ * @details
+ *
+ * Targets to verify
+ * - node's output (i.e., node itself)
+ * - node's inputs
+ */
+struct VerifyQuantizedNodeLayerWiseGranularity final : public luci::CircleNodeVisitor<bool>
+{
+private:
+ bool is_lwq(const loco::Node *node)
+ {
+ auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
+
+ if (circle_node->quantparam() == nullptr)
+ return false;
+
+ if (circle_node->quantparam()->scale.size() != 1)
+ return false;
+
+ if (circle_node->quantparam()->zerop.size() != 1)
+ return false;
+
+ return true;
+ }
+
+ bool is_lwq_const(const loco::Node *node)
+ {
+ auto circle_node = loco::must_cast<const luci::CircleConst *>(node);
+
+ if (circle_node->quantparam() == nullptr)
+ return false;
+
+ if (circle_node->quantparam()->scale.size() != 1)
+ return false;
+
+ if (circle_node->quantparam()->zerop.size() != 1)
+ return false;
+
+ return true;
+ }
+
+private:
+ bool visit(const luci::CircleConv2D *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->input()))
+ RETURN_FALSE_UNLESS(is_lwq_const(node->filter()))
+ RETURN_FALSE_UNLESS(is_lwq_const(node->bias()))
+ return true;
+ }
+
+ bool visit(const luci::CircleConcatenation *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ for (uint32_t i = 0; i < node->numValues(); i++)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node->values(i)));
+ }
+ return true;
+ }
+
+ bool visit(const luci::CircleDepthToSpace *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->input()))
+ return true;
+ }
+
+ bool visit(const luci::CircleDepthwiseConv2D *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->input()))
+ RETURN_FALSE_UNLESS(is_lwq_const(node->filter()))
+ RETURN_FALSE_UNLESS(is_lwq_const(node->bias()))
+ return true;
+ }
+
+ bool visit(const luci::CircleInstanceNorm *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->input()))
+ RETURN_FALSE_UNLESS(is_lwq_const(node->gamma()))
+ RETURN_FALSE_UNLESS(is_lwq_const(node->beta()))
+ return true;
+ }
+
+ bool visit(const luci::CirclePad *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->input()))
+ return true;
+ }
+
+ bool visit(const luci::CirclePRelu *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->input()))
+ RETURN_FALSE_UNLESS(is_lwq_const(node->alpha()))
+ return true;
+ }
+
+ bool visit(const luci::CircleTransposeConv *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->outBackprop()))
+ RETURN_FALSE_UNLESS(is_lwq_const(node->filter()))
+ luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+ if (bias != nullptr)
+ RETURN_FALSE_UNLESS(is_lwq_const(node->bias()))
+ return true;
+ }
+
+ bool visit(const luci::CircleFullyConnected *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->input()))
+ RETURN_FALSE_UNLESS(is_lwq_const(node->weights()))
+ RETURN_FALSE_UNLESS(is_lwq_const(node->bias()))
+ return true;
+ }
+
+ bool visit(const luci::CircleAdd *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->x()));
+ RETURN_FALSE_UNLESS(is_lwq(node->y()));
+ return true;
+ }
+
+ bool visit(const luci::CircleAveragePool2D *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->value()));
+ return true;
+ }
+
+ bool visit(const luci::CircleLogicalOr *)
+ {
+ // Logical OR has bool-type inputs and output
+ // Nothing to be checked
+ return true;
+ }
+
+ bool visit(const luci::CircleMaxPool2D *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->value()));
+ return true;
+ }
+
+ bool visit(const luci::CircleMean *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->input()));
+ return true;
+ }
+
+ bool visit(const luci::CircleMul *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->x()));
+ RETURN_FALSE_UNLESS(is_lwq(node->y()));
+ return true;
+ }
+
+ bool visit(const luci::CircleNotEqual *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node->x()));
+ RETURN_FALSE_UNLESS(is_lwq(node->y()));
+ return true;
+ }
+
+ bool visit(const luci::CircleRelu *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->features()));
+ return true;
+ }
+
+ bool visit(const luci::CircleReshape *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node))
+ RETURN_FALSE_UNLESS(is_lwq(node->tensor()));
+ return true;
+ }
+
+ bool visit(const luci::CircleLogistic *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->x()));
+ return true;
+ }
+
+ bool visit(const luci::CircleSoftmax *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->logits()));
+ return true;
+ }
+
+ bool visit(const luci::CircleSpaceToBatchND *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->input()));
+ return true;
+ }
+
+ bool visit(const luci::CircleSpaceToDepth *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->input()));
+ return true;
+ }
+
+ bool visit(const luci::CircleSlice *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->input()));
+ return true;
+ }
+
+ bool visit(const luci::CircleSplit *node)
+ {
+ // node's output is the input of CircleSplitOut, thus not quantized
+ RETURN_FALSE_UNLESS(is_lwq(node->input()));
+ return true;
+ }
+
+ bool visit(const luci::CircleSplitOut *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ return true;
+ }
+
+ bool visit(const luci::CircleStridedSlice *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->input()));
+ return true;
+ }
+
+ bool visit(const luci::CircleArgMax *node)
+ {
+ // node's output is index, thus not quantized
+ RETURN_FALSE_UNLESS(is_lwq(node->input()));
+ return true;
+ }
+
+ bool visit(const luci::CircleBatchToSpaceND *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->input()));
+ return true;
+ }
+
+ bool visit(const luci::CircleTanh *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->x()));
+ return true;
+ }
+
+ bool visit(const luci::CircleTranspose *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->a()));
+ return true;
+ }
+
+ bool visit(const luci::CircleFloor *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->x()));
+ return true;
+ }
+
+ bool visit(const luci::CircleGreater *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node->x()));
+ RETURN_FALSE_UNLESS(is_lwq(node->y()));
+ return true;
+ }
+
+ bool visit(const luci::CircleGreaterEqual *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node->x()));
+ RETURN_FALSE_UNLESS(is_lwq(node->y()));
+ return true;
+ }
+
+ bool visit(const luci::CircleDiv *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->x()));
+ RETURN_FALSE_UNLESS(is_lwq(node->y()));
+ return true;
+ }
+
+ bool visit(const luci::CircleFloorDiv *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->x()));
+ RETURN_FALSE_UNLESS(is_lwq(node->y()));
+ return true;
+ }
+
+ bool visit(const luci::CircleRsqrt *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->x()));
+ return true;
+ }
+
+ bool visit(const luci::CircleSqrt *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->x()));
+ return true;
+ }
+
+ bool visit(const luci::CircleElu *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->features()));
+ return true;
+ }
+
+ bool visit(const luci::CirclePow *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->x()));
+ RETURN_FALSE_UNLESS(is_lwq(node->y()));
+ return true;
+ }
+
+ bool visit(const luci::CircleResizeBilinear *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->input()));
+ return true;
+ }
+
+ // TODO: Implement more Ops
+
+ bool visit(const luci::CircleNode *) { return true; }
+};
+
+} // namespace luci
+
+#undef RETURN_FALSE_UNLESS
+
+#endif // __LUCI_VERIFY_QUANTIZED_NODE_LAYERWISE_GRANULARITY_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_VERIFY_QUANTIZED_NODE_S16_TYPE_H__
+#define __LUCI_VERIFY_QUANTIZED_NODE_S16_TYPE_H__
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+using Type = loco::DataType;
+
+// This macro is undef at the end of the file
+#define RETURN_FALSE_UNLESS(ARG) \
+ if (not(ARG)) \
+ { \
+ return false; \
+ }
+
+namespace luci
+{
+
+/**
+ * @brief Verify the data type of INT16 quantized node
+ * @details
+ *
+ * Targets to verify
+ * - node's output (i.e., node itself)
+ * - node's inputs
+ */
+struct VerifyQuantizedNodeS16Type final : public luci::CircleNodeVisitor<bool>
+{
+private:
+ bool has_type(const loco::Node *node, Type dtype)
+ {
+ auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
+ return circle_node->dtype() == dtype;
+ }
+
+private:
+ bool visit(const luci::CircleConv2D *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->filter(), Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->bias(), Type::S64))
+ return true;
+ }
+
+ bool visit(const luci::CircleConcatenation *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ for (uint32_t i = 0; i < node->numValues(); i++)
+ {
+ RETURN_FALSE_UNLESS(has_type(node->values(i), Type::S16))
+ }
+ return true;
+ }
+
+ bool visit(const luci::CircleDepthToSpace *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+ return true;
+ }
+
+ bool visit(const luci::CircleDepthwiseConv2D *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->filter(), Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->bias(), Type::S64))
+ return true;
+ }
+
+ bool visit(const luci::CircleInstanceNorm *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->gamma(), Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->beta(), Type::S16))
+ return true;
+ }
+
+ bool visit(const luci::CirclePad *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->paddings(), Type::S32))
+ return true;
+ }
+
+ bool visit(const luci::CirclePRelu *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->alpha(), Type::S16))
+ return true;
+ }
+
+ bool visit(const luci::CircleTransposeConv *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->outBackprop(), Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->filter(), Type::S16))
+ luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+ if (bias != nullptr)
+ RETURN_FALSE_UNLESS(has_type(bias, Type::S64))
+ return true;
+ }
+
+ bool visit(const luci::CircleFullyConnected *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->weights(), Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->bias(), Type::S64))
+ return true;
+ }
+
+ bool visit(const luci::CircleAdd *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
+ return true;
+ }
+
+ bool visit(const luci::CircleAveragePool2D *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->value(), Type::S16))
+ return true;
+ }
+
+ bool visit(const luci::CircleLogicalOr *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
+ RETURN_FALSE_UNLESS(has_type(node->x(), Type::BOOL))
+ RETURN_FALSE_UNLESS(has_type(node->y(), Type::BOOL))
+ return true;
+ }
+
+ bool visit(const luci::CircleMaxPool2D *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->value(), Type::S16))
+ return true;
+ }
+
+ bool visit(const luci::CircleMean *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->reduction_indices(), Type::S32))
+ return true;
+ }
+
+ bool visit(const luci::CircleMul *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
+ return true;
+ }
+
+ bool visit(const luci::CircleNotEqual *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
+ RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
+ return true;
+ }
+
+ bool visit(const luci::CircleRelu *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->features(), Type::S16))
+ return true;
+ }
+
+ bool visit(const luci::CircleReshape *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->tensor(), Type::S16))
+ luci::CircleConst *shape = dynamic_cast<luci::CircleConst *>(node->shape());
+ if (shape != nullptr)
+ RETURN_FALSE_UNLESS(has_type(shape, Type::S32))
+ return true;
+ }
+
+ bool visit(const luci::CircleLogistic *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
+ return true;
+ }
+
+ bool visit(const luci::CircleSoftmax *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->logits(), Type::S16))
+ return true;
+ }
+
+ bool visit(const luci::CircleSpaceToBatchND *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+ return true;
+ }
+
+ bool visit(const luci::CircleSpaceToDepth *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+ return true;
+ }
+
+ bool visit(const luci::CircleSlice *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->begin(), Type::S32) || has_type(node->begin(), Type::S64))
+ RETURN_FALSE_UNLESS(has_type(node->size(), Type::S32) || has_type(node->size(), Type::S64))
+ return true;
+ }
+
+ bool visit(const luci::CircleSplit *node)
+ {
+ // node's output is the input of CircleSplitOut, thus not quantized
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+ return true;
+ }
+
+ bool visit(const luci::CircleSplitOut *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ return true;
+ }
+
+ bool visit(const luci::CircleStridedSlice *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+ return true;
+ }
+
+ bool visit(const luci::CircleArgMax *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, node->output_type()))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->dimension(), Type::S32) ||
+ has_type(node->dimension(), Type::S64))
+ return true;
+ }
+
+ bool visit(const luci::CircleBatchToSpaceND *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+ return true;
+ }
+
+ bool visit(const luci::CircleTanh *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
+ return true;
+ }
+
+ bool visit(const luci::CircleTranspose *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->a(), Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->perm(), Type::S32))
+ return true;
+ }
+
+ bool visit(const luci::CircleFloor *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
+ return true;
+ }
+
+ bool visit(const luci::CircleGreater *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
+ RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
+ return true;
+ }
+
+ bool visit(const luci::CircleGreaterEqual *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
+ RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
+ return true;
+ }
+
+ bool visit(const luci::CircleDiv *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
+ return true;
+ }
+
+ bool visit(const luci::CircleFloorDiv *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
+ return true;
+ }
+
+ bool visit(const luci::CircleRsqrt *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
+ return true;
+ }
+
+ bool visit(const luci::CircleSqrt *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
+ return true;
+ }
+
+ bool visit(const luci::CircleElu *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->features(), Type::S16))
+ return true;
+ }
+
+ bool visit(const luci::CirclePow *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
+ return true;
+ }
+
+ bool visit(const luci::CircleResizeBilinear *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+ return true;
+ }
+
+ // TODO: Implement more Ops
+
+ bool visit(const luci::CircleNode *) { return true; }
+};
+
+} // namespace luci
+
+#undef RETURN_FALSE_UNLESS
+
+#endif // __LUCI_VERIFY_QUNTIZED_NODE_S16_TYPE_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_VERIFY_QUANTIZED_NODE_U8_TYPE_H__
+#define __LUCI_VERIFY_QUANTIZED_NODE_U8_TYPE_H__
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+using Type = loco::DataType;
+
+// This macro is undef at the end of the file
+#define RETURN_FALSE_UNLESS(ARG) \
+ if (not(ARG)) \
+ { \
+ return false; \
+ }
+
+namespace luci
+{
+
+/**
+ * @brief Verify the data type of UINT8 quantized node
+ * @details
+ *
+ * Targets to verify
+ * - node's output (i.e., node itself)
+ * - node's inputs
+ */
+struct VerifyQuantizedNodeU8Type final : public luci::CircleNodeVisitor<bool>
+{
+private:
+ bool has_type(const loco::Node *node, Type dtype)
+ {
+ auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
+ return circle_node->dtype() == dtype;
+ }
+
+private:
+ bool visit(const luci::CircleConv2D *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->filter(), Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->bias(), Type::S32))
+ return true;
+ }
+
+ bool visit(const luci::CircleConcatenation *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ for (uint32_t i = 0; i < node->numValues(); i++)
+ {
+ RETURN_FALSE_UNLESS(has_type(node->values(i), Type::U8))
+ }
+ return true;
+ }
+
+ bool visit(const luci::CircleDepthToSpace *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+ return true;
+ }
+
+ bool visit(const luci::CircleDepthwiseConv2D *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->filter(), Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->bias(), Type::S32))
+ return true;
+ }
+
+ bool visit(const luci::CircleInstanceNorm *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->gamma(), Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->beta(), Type::U8))
+ return true;
+ }
+
+ bool visit(const luci::CirclePad *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->paddings(), Type::S32))
+ return true;
+ }
+
+ bool visit(const luci::CirclePRelu *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->alpha(), Type::U8))
+ return true;
+ }
+
+ bool visit(const luci::CircleTransposeConv *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->outBackprop(), Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->filter(), Type::U8))
+ luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+ if (bias != nullptr)
+ RETURN_FALSE_UNLESS(has_type(bias, Type::S32))
+ return true;
+ }
+
+ bool visit(const luci::CircleFullyConnected *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->weights(), Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->bias(), Type::S32))
+ return true;
+ }
+
+ bool visit(const luci::CircleAdd *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
+ return true;
+ }
+
+ bool visit(const luci::CircleAveragePool2D *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->value(), Type::U8))
+ return true;
+ }
+
+ bool visit(const luci::CircleBatchToSpaceND *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+ return true;
+ }
+
+ bool visit(const luci::CircleLogicalOr *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
+ RETURN_FALSE_UNLESS(has_type(node->x(), Type::BOOL))
+ RETURN_FALSE_UNLESS(has_type(node->y(), Type::BOOL))
+ return true;
+ }
+
+ bool visit(const luci::CircleMaxPool2D *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->value(), Type::U8))
+ return true;
+ }
+
+ bool visit(const luci::CircleMean *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->reduction_indices(), Type::S32))
+ return true;
+ }
+
+ bool visit(const luci::CircleMul *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
+ return true;
+ }
+
+ bool visit(const luci::CircleNotEqual *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
+ RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
+ return true;
+ }
+
+ bool visit(const luci::CircleRelu *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->features(), Type::U8))
+ return true;
+ }
+
+ bool visit(const luci::CircleReshape *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->tensor(), Type::U8))
+ luci::CircleConst *shape = dynamic_cast<luci::CircleConst *>(node->shape());
+ if (shape != nullptr)
+ RETURN_FALSE_UNLESS(has_type(shape, Type::S32))
+ return true;
+ }
+
+ bool visit(const luci::CircleLogistic *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
+ return true;
+ }
+
+ bool visit(const luci::CircleSoftmax *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->logits(), Type::U8))
+ return true;
+ }
+
+ bool visit(const luci::CircleSpaceToBatchND *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+ return true;
+ }
+
+ bool visit(const luci::CircleSpaceToDepth *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+ return true;
+ }
+
+ bool visit(const luci::CircleSlice *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->begin(), Type::S32) || has_type(node->begin(), Type::S64))
+ RETURN_FALSE_UNLESS(has_type(node->size(), Type::S32) || has_type(node->size(), Type::S64))
+ return true;
+ }
+
+ bool visit(const luci::CircleSplit *node)
+ {
+ // node's output is the input of CircleSplitOut, thus not quantized
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+ return true;
+ }
+
+ bool visit(const luci::CircleSplitOut *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ return true;
+ }
+
+ bool visit(const luci::CircleStridedSlice *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+ return true;
+ }
+
+ bool visit(const luci::CircleArgMax *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, node->output_type()))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->dimension(), Type::S32) ||
+ has_type(node->dimension(), Type::S64))
+ return true;
+ }
+
+ bool visit(const luci::CircleTanh *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
+ return true;
+ }
+
+ bool visit(const luci::CircleTranspose *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->a(), Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->perm(), Type::S32))
+ return true;
+ }
+
+ bool visit(const luci::CircleFloor *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
+ return true;
+ }
+
+ bool visit(const luci::CircleGreater *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
+ RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
+ return true;
+ }
+
+ bool visit(const luci::CircleGreaterEqual *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
+ RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
+ return true;
+ }
+
+ bool visit(const luci::CircleDiv *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
+ return true;
+ }
+
+ bool visit(const luci::CircleFloorDiv *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
+ return true;
+ }
+
+ bool visit(const luci::CircleRsqrt *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
+ return true;
+ }
+
+ bool visit(const luci::CircleSqrt *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
+ return true;
+ }
+
+ bool visit(const luci::CircleElu *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->features(), Type::U8))
+ return true;
+ }
+
+ bool visit(const luci::CirclePow *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
+ return true;
+ }
+
+ bool visit(const luci::CircleResizeBilinear *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+ return true;
+ }
+
+ // TODO: Implement more Ops
+
+ bool visit(const luci::CircleNode *) { return true; }
+};
+
+} // namespace luci
+
+#undef RETURN_FALSE_UNLESS
+
+#endif // __LUCI_VERIFY_QUNTIZED_NODE_U8_TYPE_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "InferenceCandidates.h"
+
+#include <luci/IR/DeadNodeQueryService.h>
+
+namespace luci
+{
+
+std::vector<loco::Node *> inference_candidates(loco::Graph *g)
+{
+ auto candidates = loco::postorder_traversal(loco::output_nodes(g));
+
+ for (auto node : loco::all_nodes(g))
+ {
+ // already included as candidate
+ if (std::find(candidates.begin(), candidates.end(), node) != candidates.end())
+ continue;
+
+ // As the node is not used for both graph output and multiple output operation,
+ // it cannot be candidate.
+ if (node->dialect()->service<DeadNodeQueryServiceImpl>()->isDeadNode(node))
+ continue;
+
+ candidates.emplace_back(node);
+ }
+
+ return candidates;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_INFERENCE_CANDIDATES_H__
+#define __LUCI_INFERENCE_CANDIDATES_H__
+
+#include <loco.h>
+
+#include <vector>
+
+namespace luci
+{
+
+/**
+ * @brief Enumerate all the nodes whose shape/dtype should be inferenced to export graph.
+ */
+std::vector<loco::Node *> inference_candidates(loco::Graph *g);
+
+} // namespace luci
+
+#endif // __LUCI_INFERENCE_CANDIDATES_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "InferenceCandidates.h"
+#include "luci/IR/CircleNode.h"
+
+#include <algorithm>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+bool contains(const std::vector<loco::Node *> &vec, loco::Node *val)
+{
+ return std::any_of(vec.begin(), vec.end(), [val](loco::Node *node) { return node == val; });
+}
+
+} // namespace
+
+TEST(LuciPassHelpersInferenceCandidates, inference_candidates)
+{
+ auto g = loco::make_graph();
+
+ // Create nodes
+ auto input = g->nodes()->create<luci::CircleInput>();
+ auto split = g->nodes()->create<luci::CircleSplit>();
+ auto split_out1 = g->nodes()->create<luci::CircleSplitOut>();
+ auto split_out2 = g->nodes()->create<luci::CircleSplitOut>();
+ auto split_dim = g->nodes()->create<luci::CircleConst>();
+ auto output = g->nodes()->create<luci::CircleOutput>();
+
+ // Build up initial graph
+ auto graph_input1 = g->inputs()->create();
+ input->index(graph_input1->index());
+
+ split->split_dim(split_dim);
+ split->input(input);
+ split->num_split(2);
+
+ split_out1->input(split);
+ split_out1->index(0);
+
+ split_out2->input(split);
+ split_out2->index(1);
+
+ auto graph_output = g->outputs()->create();
+ output->from(split_out1);
+ output->index(graph_output->index());
+
+ auto s = luci::inference_candidates(g.get());
+
+ ASSERT_EQ(6, s.size());
+ ASSERT_TRUE(contains(s, input));
+ ASSERT_TRUE(contains(s, split));
+ ASSERT_TRUE(contains(s, split_out1));
+ ASSERT_TRUE(contains(s, split_out2));
+ ASSERT_TRUE(contains(s, split_dim));
+ ASSERT_TRUE(contains(s, output));
+}
+
+TEST(LuciPassHelpersInferenceCandidates, inference_candidates_NEG)
+{
+ auto g = loco::make_graph();
+
+ // Create nodes
+ auto input = g->nodes()->create<luci::CircleInput>();
+ auto split = g->nodes()->create<luci::CircleSplit>();
+ auto split_out1 = g->nodes()->create<luci::CircleSplitOut>();
+ auto split_out2 = g->nodes()->create<luci::CircleSplitOut>();
+ auto split_dim = g->nodes()->create<luci::CircleConst>();
+ auto relu1 = g->nodes()->create<luci::CircleRelu>();
+ auto relu2 = g->nodes()->create<luci::CircleRelu>();
+ auto output = g->nodes()->create<luci::CircleOutput>();
+
+ // Build up initial graph
+ auto graph_input1 = g->inputs()->create();
+ input->index(graph_input1->index());
+
+ split->split_dim(split_dim);
+ split->input(input);
+ split->num_split(2);
+
+ split_out1->input(split);
+ split_out1->index(0);
+
+ split_out2->input(split);
+ split_out2->index(1);
+
+ relu1->features(split_out2);
+
+ relu2->features(input);
+
+ auto graph_output = g->outputs()->create();
+ output->from(split_out1);
+ output->index(graph_output->index());
+
+ auto s = luci::inference_candidates(g.get());
+
+ ASSERT_EQ(6, s.size());
+ ASSERT_TRUE(contains(s, input));
+ ASSERT_TRUE(contains(s, split));
+ ASSERT_TRUE(contains(s, split_out1));
+ ASSERT_TRUE(contains(s, split_out2));
+ ASSERT_TRUE(contains(s, split_dim));
+ ASSERT_TRUE(contains(s, output));
+ ASSERT_FALSE(contains(s, relu1));
+ ASSERT_FALSE(contains(s, relu2));
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeFiller.h"
+
+// NOTE Do NOT delete this file; this file enforces compiler to check whether 'NodeFiller.h' is
+// complete.
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace luci
+{
+
+/**
+ * INTRODUCTION
+ * Binary operation f(x,y) is 'commutative' when
+ * f(x,y) == f(y,x) holds for all x, y.
+ * For examples, ADD, MUL and SQUARED_DIFFERENCE are commutative.
+ * These helpers make it easy to find commutative arguments of commutative node.
+ *
+ * HOW TO USE
+ * COMM_NODE *node;
+ * ARG_TYPE_1 *arg1;
+ * ARG_TYPE_2 *arg2;
+ *
+ * bool ok = fill(&arg1, &arg2).with_commutative_args_of(node);
+ *
+ * Result
+ * If 'node's commutative argument types are actually {ARG_TYPE_1, ARG_TYPE_2}
+ * (as a set), 'arg1' and 'arg2' set as actual 'node's arguments with matching
+ * type, and return value 'ok' is true.
+ * Otherwise, 'arg1' and 'arg2' not changed, 'ok' is false.
+ */
+
+template <class ARG_TYPE_1, class ARG_TYPE_2> class NodeFiller final
+{
+public:
+ NodeFiller(ARG_TYPE_1 **arg_1, ARG_TYPE_2 **arg_2) : _arg_1(arg_1), _arg_2(arg_2)
+ {
+ // DO NOTHING
+ }
+
+ /**
+ * @return true When 'node's argument types are 'ARG_TYPE_1' and 'ARG_TYPE_2'
+ * In such case, it assign '_arg_1' and '_arg_2' to actual arguments
+ *
+ * @return false When 'node's argument types are NOT matched with 'ARG_TYPE_*'
+ * In such case, it does not amend '_arg_1' and '_arg_2'
+ *
+ * @require COMM_NODE has member x() and y()
+ */
+ template <class COMM_NODE> bool with_commutative_args_of(const COMM_NODE *node);
+
+private:
+ ARG_TYPE_1 **_arg_1;
+ ARG_TYPE_2 **_arg_2;
+};
+
+template <class ARG_TYPE_1, class ARG_TYPE_2>
+inline NodeFiller<ARG_TYPE_1, ARG_TYPE_2> fill(ARG_TYPE_1 **arg_1, ARG_TYPE_2 **arg_2)
+{
+ return NodeFiller<ARG_TYPE_1, ARG_TYPE_2>{arg_1, arg_2};
+}
+
+template <class ARG_TYPE_1, class ARG_TYPE_2>
+template <class COMM_NODE>
+bool NodeFiller<ARG_TYPE_1, ARG_TYPE_2>::with_commutative_args_of(const COMM_NODE *node)
+{
+ // Case 1) X == ARG_TYPE_1 / Y == ARG_TYPE_2
+ {
+ auto x = dynamic_cast<ARG_TYPE_1 *>(node->x());
+ auto y = dynamic_cast<ARG_TYPE_2 *>(node->y());
+
+ if (x && y)
+ {
+ *_arg_1 = x;
+ *_arg_2 = y;
+ return true;
+ }
+ }
+
+ // Case 2) X == ARG_TYPE_2 / Y == ARG_TYPE_1
+ {
+ auto x = dynamic_cast<ARG_TYPE_2 *>(node->x());
+ auto y = dynamic_cast<ARG_TYPE_1 *>(node->y());
+
+ if (x && y)
+ {
+ *_arg_1 = y;
+ *_arg_2 = x;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+#include "NodeFiller.h"
+
+TEST(NodeFillerTest, simple_test)
+{
+ luci::CircleConst maxi_const;
+ luci::CircleMinimum mini;
+ luci::CircleMaximum maxi;
+ maxi.x(&maxi_const);
+ maxi.y(&mini);
+
+ luci::CircleConst *x = nullptr;
+ luci::CircleMinimum *y = nullptr;
+
+ EXPECT_TRUE(luci::fill(&x, &y).with_commutative_args_of(&maxi));
+ EXPECT_TRUE(x == &maxi_const);
+ EXPECT_TRUE(y == &mini);
+
+ x = nullptr;
+ y = nullptr;
+
+ EXPECT_TRUE(luci::fill(&y, &x).with_commutative_args_of(&maxi));
+ EXPECT_TRUE(x == &maxi_const);
+ EXPECT_TRUE(y == &mini);
+}
+
+TEST(NodeFillerTest, wrong_condition_NEG)
+{
+ luci::CircleConst add_const;
+ luci::CircleMinimum mini;
+ luci::CircleAdd add;
+ add.x(&add_const);
+ add.y(&mini);
+
+ luci::CircleMul *x = nullptr;
+ luci::CircleMinimum *y = nullptr;
+
+ EXPECT_FALSE(luci::fill(&x, &y).with_commutative_args_of(&add));
+ EXPECT_FALSE(luci::fill(&y, &x).with_commutative_args_of(&add));
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Strings.h"
+
+#include <algorithm>
+
+namespace luci
+{
+
+bool in_array(const std::string &str, const std::vector<std::string> &array)
+{
+ return std::find(array.begin(), array.end(), str) != array.end();
+}
+
+std::string to_string(const std::vector<std::string> &strings)
+{
+ assert(!strings.empty());
+
+ std::string res;
+ for (unsigned int i = 0; i < strings.size() - 1; i++)
+ res += strings[i] + ", ";
+
+ res += strings[strings.size() - 1];
+ return res;
+}
+
+std::string to_lower_case(std::string s)
+{
+ std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c) { return std::tolower(c); });
+ return s;
+}
+
+loco::DataType str_to_dtype(const std::string &str)
+{
+ if (to_lower_case(str).compare("uint8") == 0)
+ return loco::DataType::U8;
+ if (to_lower_case(str).compare("uint16") == 0)
+ return loco::DataType::U16;
+ if (to_lower_case(str).compare("uint32") == 0)
+ return loco::DataType::U32;
+ if (to_lower_case(str).compare("uint64") == 0)
+ return loco::DataType::U64;
+
+ if (to_lower_case(str).compare("int8") == 0)
+ return loco::DataType::S8;
+ if (to_lower_case(str).compare("int16") == 0)
+ return loco::DataType::S16;
+ if (to_lower_case(str).compare("int32") == 0)
+ return loco::DataType::S32;
+ if (to_lower_case(str).compare("int64") == 0)
+ return loco::DataType::S64;
+
+ if (to_lower_case(str).compare("float16") == 0)
+ return loco::DataType::FLOAT16;
+ if (to_lower_case(str).compare("float32") == 0)
+ return loco::DataType::FLOAT32;
+ if (to_lower_case(str).compare("float64") == 0)
+ return loco::DataType::FLOAT64;
+
+ if (to_lower_case(str).compare("bool") == 0)
+ return loco::DataType::BOOL;
+
+ return loco::DataType::Unknown;
+}
+
+QuantizationGranularity str_to_granularity(const std::string &str)
+{
+ if (to_lower_case(str).compare("layer") == 0)
+ return QuantizationGranularity::LayerWise;
+
+ if (to_lower_case(str).compare("channel") == 0)
+ return QuantizationGranularity::ChannelWise;
+
+ throw std::runtime_error("Quantization granularity must be either 'layer' or 'channel'");
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PASS_HELPERS_STRINGS_H__
+#define __LUCI_PASS_HELPERS_STRINGS_H__
+
+#include "luci/Pass/QuantizationParameters.h"
+
+#include <loco.h>
+
+#include <vector>
+#include <sstream>
+#include <string>
+
+namespace luci
+{
+
+bool in_array(const std::string &, const std::vector<std::string> &);
+
+std::string to_string(const std::vector<std::string> &);
+
+std::string to_lower_case(std::string);
+
+loco::DataType str_to_dtype(const std::string &);
+
+QuantizationGranularity str_to_granularity(const std::string &);
+
+template <typename T> std::vector<T> csv_to_vector(const std::string &str)
+{
+ std::vector<T> ret;
+ std::istringstream is(str);
+ for (T i; is >> i;)
+ {
+ assert(i != ',');
+ ret.push_back(i);
+ if (is.peek() == ',')
+ is.ignore();
+ }
+ return ret;
+}
+
+} // namespace luci
+
+#endif // __LUCI_PASS_HELPERS_STRINGS_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Strings.h"
+
+#include "luci/Pass/QuantizationParameters.h"
+
+#include <gtest/gtest.h>
+
+TEST(StringsTest, str_to_dtype)
+{
+ ASSERT_EQ(loco::DataType::U8, luci::str_to_dtype("uint8"));
+ ASSERT_EQ(loco::DataType::U16, luci::str_to_dtype("uint16"));
+ ASSERT_EQ(loco::DataType::U32, luci::str_to_dtype("uint32"));
+ ASSERT_EQ(loco::DataType::U64, luci::str_to_dtype("uint64"));
+
+ ASSERT_EQ(loco::DataType::S8, luci::str_to_dtype("int8"));
+ ASSERT_EQ(loco::DataType::S16, luci::str_to_dtype("int16"));
+ ASSERT_EQ(loco::DataType::S32, luci::str_to_dtype("int32"));
+ ASSERT_EQ(loco::DataType::S64, luci::str_to_dtype("int64"));
+
+ ASSERT_EQ(loco::DataType::FLOAT16, luci::str_to_dtype("float16"));
+ ASSERT_EQ(loco::DataType::FLOAT32, luci::str_to_dtype("float32"));
+ ASSERT_EQ(loco::DataType::FLOAT64, luci::str_to_dtype("float64"));
+
+ ASSERT_EQ(loco::DataType::BOOL, luci::str_to_dtype("bool"));
+
+ ASSERT_EQ(loco::DataType::Unknown, luci::str_to_dtype("foo"));
+}
+
+TEST(StringsTest, str_to_granularity)
+{
+ ASSERT_EQ(luci::QuantizationGranularity::LayerWise, luci::str_to_granularity("layer"));
+ ASSERT_EQ(luci::QuantizationGranularity::ChannelWise, luci::str_to_granularity("channel"));
+
+ EXPECT_THROW(luci::str_to_granularity("foo"), std::runtime_error);
+}
+
+TEST(StringsTest, csv_to_vector_int32)
+{
+ auto ret = luci::csv_to_vector<int32_t>("1,2,3");
+ ASSERT_EQ(3, ret.size());
+ ASSERT_EQ(1, ret.at(0));
+ ASSERT_EQ(3, ret.at(2));
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TypeMapper.h"
+
+// NOTE Do NOT delete this file; this file enforces compiler to check whether 'TypeMapper.h' is
+// complete.
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <loco/IR/DataType.h>
+
+#include <cstdint>
+
+namespace luci
+{
+
+/**
+ * @brief TypeMapper maps between c++ primitive data type and loco::DataType.
+ */
+template <typename T> struct TypeMapper
+{
+ static constexpr loco::DataType get() { return loco::DataType::Unknown; }
+};
+
+template <> struct TypeMapper<float>
+{
+ static constexpr loco::DataType get() { return loco::DataType::FLOAT32; }
+};
+
+template <> struct TypeMapper<uint8_t>
+{
+ static constexpr loco::DataType get() { return loco::DataType::U8; }
+};
+
+template <> struct TypeMapper<uint16_t>
+{
+ static constexpr loco::DataType get() { return loco::DataType::U16; }
+};
+
+template <> struct TypeMapper<uint32_t>
+{
+ static constexpr loco::DataType get() { return loco::DataType::U32; }
+};
+
+template <> struct TypeMapper<uint64_t>
+{
+ static constexpr loco::DataType get() { return loco::DataType::U64; }
+};
+
+template <> struct TypeMapper<int8_t>
+{
+ static constexpr loco::DataType get() { return loco::DataType::S8; }
+};
+
+template <> struct TypeMapper<int16_t>
+{
+ static constexpr loco::DataType get() { return loco::DataType::S16; }
+};
+
+template <> struct TypeMapper<int32_t>
+{
+ static constexpr loco::DataType get() { return loco::DataType::S32; }
+};
+
+template <> struct TypeMapper<int64_t>
+{
+ static constexpr loco::DataType get() { return loco::DataType::S64; }
+};
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+#include "TypeMapper.h"
+
+#include <vector>
+
+namespace
+{
+
+template <typename T> bool fill_const_node(luci::CircleConst *node, std::vector<T> &data)
+{
+ if (node->dtype() != luci::TypeMapper<T>::get())
+ return false;
+
+ node->size<luci::TypeMapper<T>::get()>(data.size());
+ for (uint32_t i = 0; i < data.size(); i++)
+ {
+ node->at<luci::TypeMapper<T>::get()>(i) = data.at(i);
+ }
+
+ return true;
+}
+
+class STRANGER
+{
+};
+
+} // namespace
+
+TEST(TypeMapperTest, simple_test)
+{
+ EXPECT_EQ(loco::DataType::FLOAT32, luci::TypeMapper<float>::get());
+ EXPECT_EQ(loco::DataType::U8, luci::TypeMapper<uint8_t>::get());
+ EXPECT_EQ(loco::DataType::U16, luci::TypeMapper<uint16_t>::get());
+ EXPECT_EQ(loco::DataType::U32, luci::TypeMapper<uint32_t>::get());
+ EXPECT_EQ(loco::DataType::U64, luci::TypeMapper<uint64_t>::get());
+ EXPECT_EQ(loco::DataType::S8, luci::TypeMapper<int8_t>::get());
+ EXPECT_EQ(loco::DataType::S16, luci::TypeMapper<int16_t>::get());
+ EXPECT_EQ(loco::DataType::S32, luci::TypeMapper<int32_t>::get());
+ EXPECT_EQ(loco::DataType::S64, luci::TypeMapper<int64_t>::get());
+}
+
+TEST(TypeMapperTest, with_template_test)
+{
+ std::vector<int32_t> int32_vec{0, 1, 2, 3, 4, 5, 6, 7};
+ luci::CircleConst const_node;
+ const_node.dtype(loco::DataType::S32);
+ EXPECT_TRUE(fill_const_node(&const_node, int32_vec));
+ EXPECT_EQ(8, const_node.size<loco::DataType::S32>());
+ EXPECT_EQ(0, const_node.at<loco::DataType::S32>(0));
+ EXPECT_EQ(1, const_node.at<loco::DataType::S32>(1));
+ EXPECT_EQ(2, const_node.at<loco::DataType::S32>(2));
+ EXPECT_EQ(3, const_node.at<loco::DataType::S32>(3));
+ EXPECT_EQ(4, const_node.at<loco::DataType::S32>(4));
+ EXPECT_EQ(5, const_node.at<loco::DataType::S32>(5));
+ EXPECT_EQ(6, const_node.at<loco::DataType::S32>(6));
+ EXPECT_EQ(7, const_node.at<loco::DataType::S32>(7));
+
+ std::vector<float> f32_vec{0.0, 1.1, 2.2, 3.3, 4.4, 5.5};
+ const_node.dtype(loco::DataType::FLOAT32);
+ EXPECT_FALSE(fill_const_node(&const_node, int32_vec));
+ EXPECT_TRUE(fill_const_node(&const_node, f32_vec));
+ EXPECT_EQ(6, const_node.size<loco::DataType::FLOAT32>());
+ EXPECT_FLOAT_EQ(0.0, const_node.at<loco::DataType::FLOAT32>(0));
+ EXPECT_FLOAT_EQ(1.1, const_node.at<loco::DataType::FLOAT32>(1));
+ EXPECT_FLOAT_EQ(2.2, const_node.at<loco::DataType::FLOAT32>(2));
+ EXPECT_FLOAT_EQ(3.3, const_node.at<loco::DataType::FLOAT32>(3));
+ EXPECT_FLOAT_EQ(4.4, const_node.at<loco::DataType::FLOAT32>(4));
+ EXPECT_FLOAT_EQ(5.5, const_node.at<loco::DataType::FLOAT32>(5));
+}
+
+TEST(TypeMapperTest, wrong_condition_NEG)
+{
+ EXPECT_EQ(loco::DataType::Unknown, luci::TypeMapper<STRANGER>::get());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PASS_TEST_FIRST_NODE_H__
+#define __LUCI_PASS_TEST_FIRST_NODE_H__
+
+#include <luci/IR/CircleNodes.h>
+
+#include <loco.h>
+
+namespace luci
+{
+namespace test
+{
+
+template <class T> T *first_node(loco::Graph *g)
+{
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto target_node = dynamic_cast<T *>(node);
+ if (target_node != nullptr)
+ return target_node;
+ }
+ return nullptr;
+}
+
+} // namespace test
+} // namespace luci
+
+#endif // __LUCI_PASS_TEST_FIRST_NODE_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TestFirstNode.h"
+
+// This file validates "TestFirstNode.h". Pleaes DO NOT remove this file.
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PASS_TEST_IO_GRAPH_H__
+#define __LUCI_PASS_TEST_IO_GRAPH_H__
+
+#include "TestShape.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace luci
+{
+namespace test
+{
+
+/**
+ * @brief Graphlet with Inputs and loco::Graph for multiple inputs
+ * @note Every Graph will have Input(s) and Output(s)
+ * We put loco::Graph only in IsGraphlet not to declare separate
+ * class for loco::Graph
+ */
+template <unsigned N> class TestIsGraphlet
+{
+public:
+ TestIsGraphlet()
+ {
+ for (uint32_t n = 0; n < N; ++n)
+ {
+ _graph_inputs[n] = nullptr;
+ _inputs[n] = nullptr;
+ }
+ }
+
+public:
+ virtual void init(loco::Graph *g, const ShapeU32 shape_in)
+ {
+ for (uint32_t n = 0; n < N; ++n)
+ {
+ _graph_inputs[n] = g->inputs()->create();
+
+ _inputs[n] = g->nodes()->create<luci::CircleInput>();
+ _inputs[n]->shape(shape_in);
+ _inputs[n]->shape_status(luci::ShapeStatus::VALID);
+ _inputs[n]->dtype(loco::DataType::FLOAT32);
+ _inputs[n]->name("input_" + std::to_string(n));
+
+ _inputs[n]->index(_graph_inputs[n]->index());
+
+ auto input_shape = std::make_unique<loco::TensorShape>();
+ set_shape_vector(input_shape.get(), shape_in);
+ _graph_inputs[n]->shape(std::move(input_shape));
+ _graph_inputs[n]->dtype(loco::DataType::FLOAT32);
+ }
+ }
+
+public:
+ loco::Graph *g(void) { return &_g; }
+ luci::CircleInput *input(int idx) { return _inputs[idx]; }
+
+protected:
+ loco::Graph _g;
+ std::array<loco::GraphInput *, N> _graph_inputs;
+ std::array<luci::CircleInput *, N> _inputs;
+};
+
+/**
+ * @brief Graphlet with one Input
+ */
+class TestIGraphlet : public TestIsGraphlet<1>
+{
+public:
+ luci::CircleInput *input() { return _inputs[0]; }
+};
+
+/**
+ * @brief Graphlet with Outputs for multiple outputs
+ */
+template <unsigned N> class TestOsGraphlet
+{
+public:
+ TestOsGraphlet()
+ {
+ for (uint32_t n = 0; n < N; ++n)
+ {
+ _graph_outputs[n] = nullptr;
+ _outputs[n] = nullptr;
+ }
+ }
+
+public:
+ virtual void init(loco::Graph *g, const ShapeU32 shape_out)
+ {
+ for (uint32_t n = 0; n < N; ++n)
+ {
+ _graph_outputs[n] = g->outputs()->create();
+
+ _outputs[n] = g->nodes()->create<luci::CircleOutput>();
+ _outputs[n]->shape(shape_out);
+ _outputs[n]->shape_status(luci::ShapeStatus::VALID);
+ _outputs[n]->dtype(loco::DataType::FLOAT32);
+ _outputs[n]->name("output_" + std::to_string(n));
+
+ _outputs[n]->index(_graph_outputs[n]->index());
+
+ auto output_shape = std::make_unique<loco::TensorShape>();
+ set_shape_vector(output_shape.get(), shape_out);
+ _graph_outputs[n]->shape(std::move(output_shape));
+ _graph_outputs[n]->dtype(loco::DataType::FLOAT32);
+ }
+ }
+
+public:
+ luci::CircleOutput *output(int idx) { return _outputs[idx]; }
+
+protected:
+ std::array<loco::GraphOutput *, N> _graph_outputs;
+ std::array<luci::CircleOutput *, N> _outputs;
+};
+
+/**
+ * @brief Graphlet with one Output
+ */
+class TestOGraphlet : public TestOsGraphlet<1>
+{
+public:
+ luci::CircleOutput *output() { return _outputs[0]; }
+};
+
+/**
+ * @brief Graph with Input and Output
+ */
+class TestIOGraph : public TestIGraphlet, public TestOGraphlet
+{
+public:
+ TestIOGraph() = default;
+
+public:
+ virtual void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+ {
+ TestIsGraphlet<1>::init(g(), shape_in);
+ TestOsGraphlet<1>::init(g(), shape_out);
+ }
+};
+
+} // namespace test
+} // namespace luci
+
+#endif // __LUCI_PASS_TEST_IO_GRAPH_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TestIOGraph.h"
+
+// This file validates "TestIOGraph.h". Pleaes DO NOT remove this file.
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PASS_TEST_SHAPE_H__
+#define __LUCI_PASS_TEST_SHAPE_H__
+
+#include <luci/IR/CircleNode.h>
+
+#include <initializer_list>
+
+namespace luci
+{
+namespace test
+{
+
+using ShapeU32 = std::initializer_list<uint32_t>;
+using ShapeI32 = std::initializer_list<int32_t>;
+
+void set_shape_vector(loco::TensorShape *shape, const ShapeU32 &values);
+void set_shape_vector(luci::CircleConst *const_node, const ShapeI32 &values);
+
+uint32_t num_elements(const ShapeU32 shape);
+
+} // namespace test
+} // namespace luci
+
+#endif // __LUCI_PASS_TEST_SHAPE_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TestShape.h"
+
+/**
+ * @note This file does not hold any test cases but provides methods for tests
+ */
+
+namespace luci
+{
+namespace test
+{
+
+void set_shape_vector(loco::TensorShape *shape, const ShapeU32 &values)
+{
+ uint32_t r = 0;
+ shape->rank(values.size());
+ for (auto v : values)
+ shape->dim(r++).set(v);
+}
+
+void set_shape_vector(luci::CircleConst *const_node, const ShapeI32 &values)
+{
+ const_node->rank(1);
+ const_node->dim(0).set(values.size());
+ const_node->shape_status(luci::ShapeStatus::VALID);
+ const_node->dtype(loco::DataType::S32);
+ const_node->size<loco::DataType::S32>(values.size());
+ uint32_t idx = 0;
+ for (auto val : values)
+ const_node->at<loco::DataType::S32>(idx++) = val;
+}
+
+uint32_t num_elements(const ShapeU32 shape)
+{
+ uint32_t result = 1;
+ for (auto val : shape)
+ result = result * val;
+ return result;
+}
+
+} // namespace test
+} // namespace luci
--- /dev/null
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(luci_profile SHARED ${SOURCES})
+target_include_directories(luci_profile PRIVATE src)
+target_include_directories(luci_profile PUBLIC include)
+target_link_libraries(luci_profile PUBLIC loco)
+target_link_libraries(luci_profile PUBLIC luci_lang)
+
+install(TARGETS luci_profile DESTINATION lib)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(luci_profile_test ${TESTS})
+target_include_directories(luci_profile_test PRIVATE src)
+target_link_libraries(luci_profile_test luci_lang)
+target_link_libraries(luci_profile_test luci_profile)
--- /dev/null
+# luci-profile
+
+`luci-profile` provides profiling related items.
+
+## CircleNodeOrigin
+
+`CircleNodeOrigin` allow us know where some node is originated from.
+
+Let's assume following graph transformations are done.
+
+```
+ | | |
+ [node1] --------+ | |
+(id = 1) | | |
+ | +--------> [node5] ----------------> [node6]
+ | | (origin = [1,2]) (origin = [1,2])
+ [node2] --------+ | |
+(id = 2) | |
+ | | |
+ [node3] -----------------> [node3] --------+-------> [node3]
+(id = 3) (origin = [3]) | (origin = [3,4])
+ | | | |
+ [node4] -----------------> [node4] --------+ |
+(id = 4) (origin = [4]) |
+ | | |
+
+<Circle1> -- optimizer --> <circle2> -- quantizer --> <circle3>
+```
+
+The most important purpose of using `CircleNodeOrigin` is preserving origin information.
+Following changes show how origin information is preserved even after graph is transformed.
+
+- `node3`
+ - `node4` is absorbed to **existing** `node3`.
+ - origin of `node4` is absorbed to origin of `node3`.
+- `node5`
+ - `node1` and `node2` are fused to **newly created** `node5`.
+ - origin of `node1` and `node2` are inherited to origin of `node4`.
+- `node6`
+ - `node5` is **replaced with newly created** `node6`.
+ - origin of `node5` is copied to origin of `node6`.
+
+**Therefore, when using `CircleNodeOrigin`, please aware of the most important principle. "Preserve origin information"**
+
+Next items are about implementation details to store the origin information.
+
+### Source Table
+
+Source table includes a set of id and name of origin node.
+
+#### Binary format
+
+```
+[ entry_number : uint32_t ]
+[ id : uint32_t ][ length : uint32_t ][ data : char * length ] * entry_number
+```
+- entry_number : The number of entries
+ - Each entry consists of id, length, and data.
+- id : ID of origin node
+- length : Length of data
+- data : Name of origin node **(null-terminated string)**
+
+#### In-memory format
+```cpp
+// size = entry_number
+std::map<uint32_t /* id */, std::string /* name */>
+```
+
+#### Example
+
+Following example means "Name of origin 1 is node1".
+
+```
+[Binary Format]
+ 0x01 00 00 00 0x01 00 00 00 0x06 00 00 00 0x6e 0x6f 0x64 0x65 0x31 00
+ ------------- ------------- ------------- ---- ---- ---- ---- ---- ----
+entry_number=1 id=1 length=6 'n' 'o' 'd' 'e' '1' '\0'
+```
+```cpp
+[In-memory Format]
+std::map<uint32_t, std::string>({1, "node1"});
+```
+
+### Op Table
+
+Op table includes a set of id of operation and id(s) of operation's origin nodes.
+
+#### Binary format
+
+Op table is stored in circle file as binary with following format.
+```
+[ entry_number : uint32_t ]
+[ id : uint32_t ][ node_num : uint32_t ][ node_ids : uint32_t * node_num ] * entry_number
+```
+- entry_number : The number of entries
+ - Each entry consists of id, node_num, and node_ids.
+- id : ID of operation in circle model file
+- node_num : The number of operation's origin nodes
+- node_ids : Set of IDs of origin nodes
+
+#### In-memory format
+```cpp
+std::map<uint32_t /* id */, std::set<uint32_t> /* node_ids */>
+```
+
+#### Example
+
+Following example means "Operation 5 is originated from origin 1 and origin 2".
+
+```
+[Binary Format]
+ 0x01 00 00 00 0x05 00 00 00 0x02 00 00 00 0x01 00 00 00 0x02 00 00 00
+ ------------- ------------- ------------- ---------------------------
+entry_number=1 id=5 node_num=2 node_ids : 1, 2
+```
+```cpp
+[In-memory Format]
+std::map<uint32_t, std::set<uint32_t>>({5, std::set{1, 2}});
+```
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PROFILE_CIRCLE_NODE_ID_H__
+#define __LUCI_PROFILE_CIRCLE_NODE_ID_H__
+
+#include <luci/IR/CircleNode.h>
+
+namespace luci
+{
+
+using CircleNodeID = uint32_t;
+
+bool has_node_id(const luci::CircleNode *circle_node);
+
+void set_node_id(luci::CircleNode *circle_node, CircleNodeID id);
+
+CircleNodeID get_node_id(const luci::CircleNode *circle_node);
+
+} // namespace luci
+
+#endif // __LUCI_PROFILE_CIRCLE_NODE_ID_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PROFILE_CIRCLE_NODE_ORIGIN_H__
+#define __LUCI_PROFILE_CIRCLE_NODE_ORIGIN_H__
+
+#include "CircleNodeID.h"
+
+#include <luci/IR/CircleNode.h>
+
+#include <set>
+
+namespace luci
+{
+
+class CircleNodeOrigin
+{
+protected:
+ struct Source
+ {
+ public:
+ std::string name(void) const { return _name; }
+ void name(const std::string &name) { _name = name; }
+
+ uint32_t id(void) const { return _id; }
+ void id(const uint32_t id) { _id = id; }
+
+ private:
+ std::string _name;
+ uint32_t _id = 0;
+ };
+
+public:
+ virtual std::set<const Source *> sources(void) const = 0;
+};
+
+std::shared_ptr<CircleNodeOrigin> single_origin(uint32_t id, const std::string &name);
+
+std::shared_ptr<CircleNodeOrigin>
+composite_origin(const std::initializer_list<std::shared_ptr<CircleNodeOrigin>> origins);
+
+std::shared_ptr<CircleNodeOrigin>
+composite_origin(const std::vector<std::shared_ptr<CircleNodeOrigin>> &origins);
+
+} // namespace luci
+
+namespace luci
+{
+
+bool has_origin(const luci::CircleNode *circle_node);
+
+void add_origin(luci::CircleNode *circle_node, const std::shared_ptr<CircleNodeOrigin> origin);
+
+// NOTE When circle_node does not have origin, nullptr is returned
+const std::shared_ptr<luci::CircleNodeOrigin> get_origin(const luci::CircleNode *circle_node);
+
+} // namespace luci
+
+#endif // __LUCI_PROFILE_CIRCLE_NODE_ORIGIN_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Profile/CircleNodeID.h"
+
+#include <loco.h>
+
+#include <stdexcept>
+
+namespace
+{
+
+/**
+ * @brief Set annotation for circle node id
+ * @note Once CircleNodeID is annotated, it should not be changed.
+ * If CircleNodeID is needed to be changed, create new CircleNodeID.
+ */
+class CircleNodeIDAnnotation final : public loco::NodeAnnotation
+{
+public:
+ CircleNodeIDAnnotation() = delete;
+
+ CircleNodeIDAnnotation(luci::CircleNodeID node_id) : _node_id{node_id}
+ {
+ // Do nothing
+ }
+
+public:
+ luci::CircleNodeID node_id(void) const { return _node_id; }
+ // No setter
+
+private:
+ luci::CircleNodeID _node_id;
+};
+
+} // namespace
+
+namespace luci
+{
+
+bool has_node_id(const luci::CircleNode *circle_node)
+{
+ return circle_node->annot<CircleNodeIDAnnotation>() != nullptr;
+}
+
+void set_node_id(luci::CircleNode *circle_node, luci::CircleNodeID id)
+{
+ circle_node->annot<CircleNodeIDAnnotation>(nullptr);
+ circle_node->annot(std::make_unique<CircleNodeIDAnnotation>(id));
+}
+
+luci::CircleNodeID get_node_id(const luci::CircleNode *circle_node)
+{
+ if (!has_node_id(circle_node))
+ throw std::runtime_error("Cannot find CircleNodeID");
+
+ return circle_node->annot<CircleNodeIDAnnotation>()->node_id();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Profile/CircleNodeID.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+TEST(LuciCircleNodeID, simple_circle_node_id)
+{
+ auto g = loco::make_graph();
+ auto add = g->nodes()->create<luci::CircleAdd>();
+
+ ASSERT_FALSE(has_node_id(add));
+
+ set_node_id(add, 3);
+
+ ASSERT_TRUE(has_node_id(add));
+ ASSERT_EQ(3, get_node_id(add));
+}
+
+TEST(LuciCircleNodeID, simple_circle_node_id_NEG)
+{
+ auto g = loco::make_graph();
+ auto add = g->nodes()->create<luci::CircleAdd>();
+
+ ASSERT_FALSE(has_node_id(add));
+
+ ASSERT_ANY_THROW(get_node_id(add));
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Profile/CircleNodeOrigin.h"
+
+#include <loco.h>
+
+#include <cassert>
+#include <vector>
+
+namespace
+{
+
+/**
+ * @brief Set annotation for recording origin information
+ * @note Once CircleNodeOrigin is annotated, it should not be changed.
+ * If CircleNodeOrigin is needed to be changed, create new CircleNodeOrigin.
+ */
+class CircleNodeOriginAnnotation final : public loco::NodeAnnotation
+{
+public:
+ CircleNodeOriginAnnotation() = delete;
+
+ CircleNodeOriginAnnotation(const std::shared_ptr<luci::CircleNodeOrigin> origin) : _origin(origin)
+ {
+ // Do nothing
+ }
+
+public:
+ const std::shared_ptr<luci::CircleNodeOrigin> origin(void) const { return _origin; }
+ // No setter
+
+private:
+ const std::shared_ptr<luci::CircleNodeOrigin> _origin;
+};
+
+} // namespace
+
+namespace
+{
+
+class SingleOrigin final : public luci::CircleNodeOrigin
+{
+public:
+ SingleOrigin() = delete;
+
+ SingleOrigin(uint32_t id, const std::string &name)
+ {
+ _source.id(id);
+ _source.name(name);
+ }
+
+public:
+ std::set<const Source *> sources(void) const final
+ {
+ std::set<const Source *> res;
+ res.emplace(&_source);
+ return res;
+ }
+
+private:
+ Source _source;
+};
+
+class CompositeOrigin final : public luci::CircleNodeOrigin
+{
+public:
+ CompositeOrigin() = delete;
+
+ template <typename T> CompositeOrigin(T origins)
+ {
+ if (origins.size() == 0)
+ throw std::invalid_argument("No origins provided");
+
+ for (auto &origin : origins)
+ {
+ if (origin != nullptr)
+ _origins.emplace_back(origin);
+ }
+ }
+
+public:
+ std::set<const Source *> sources(void) const final
+ {
+ std::set<const Source *> res;
+
+ for (auto &origin : _origins)
+ {
+ for (auto source : origin->sources())
+ {
+ res.emplace(source);
+ }
+ }
+
+ return res;
+ }
+
+private:
+ std::vector<std::shared_ptr<CircleNodeOrigin>> _origins;
+};
+
+} // namespace
+
+namespace luci
+{
+
+std::shared_ptr<CircleNodeOrigin> single_origin(uint32_t id, const std::string &name)
+{
+ return std::make_shared<SingleOrigin>(id, name);
+}
+
+std::shared_ptr<CircleNodeOrigin>
+composite_origin(const std::initializer_list<std::shared_ptr<CircleNodeOrigin>> origins)
+{
+ return std::make_shared<CompositeOrigin>(origins);
+}
+
+std::shared_ptr<CircleNodeOrigin>
+composite_origin(const std::vector<std::shared_ptr<CircleNodeOrigin>> &origins)
+{
+ return std::make_shared<CompositeOrigin>(origins);
+}
+
+} // namespace luci
+
+namespace luci
+{
+
+bool has_origin(const luci::CircleNode *circle_node)
+{
+ return circle_node->annot<CircleNodeOriginAnnotation>() != nullptr;
+}
+
+/**
+ * @brief 'origin' is added to the existing origin of circle_node.
+ * @note If 'origin' is nullptr, nothing is changed.
+ * For more detail, please refer to CompositeOrigin constructor.
+ */
+void add_origin(luci::CircleNode *circle_node, const std::shared_ptr<CircleNodeOrigin> origin)
+{
+ auto new_origin = composite_origin({get_origin(circle_node), origin});
+ circle_node->annot<CircleNodeOriginAnnotation>(nullptr);
+ circle_node->annot(std::make_unique<CircleNodeOriginAnnotation>(new_origin));
+}
+
+const std::shared_ptr<luci::CircleNodeOrigin> get_origin(const luci::CircleNode *circle_node)
+{
+ if (!has_origin(circle_node))
+ return nullptr;
+
+ assert(circle_node->annot<CircleNodeOriginAnnotation>()->origin() != nullptr);
+ return circle_node->annot<CircleNodeOriginAnnotation>()->origin();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Profile/CircleNodeID.h"
+#include "luci/Profile/CircleNodeOrigin.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+TEST(LuciCircleNodeOrigin, simple_single_origin)
+{
+ auto g = loco::make_graph();
+ auto add = g->nodes()->create<luci::CircleAdd>();
+
+ ASSERT_FALSE(has_origin(add));
+
+ auto origin = luci::single_origin(3, "add");
+ add_origin(add, origin);
+
+ ASSERT_TRUE(has_origin(add));
+
+ auto sources = get_origin(add)->sources();
+ ASSERT_EQ(1, sources.size());
+ for (auto source : sources)
+ {
+ ASSERT_EQ(3, source->id());
+ ASSERT_EQ(0, source->name().compare("add"));
+ }
+}
+
+TEST(LuciCircleNodeOrigin, simple_composite_origin_with_initializer)
+{
+ auto g = loco::make_graph();
+ auto mul = g->nodes()->create<luci::CircleMul>();
+
+ ASSERT_FALSE(has_origin(mul));
+
+ auto origin =
+ luci::composite_origin({luci::single_origin(3, "add"), luci::single_origin(7, "sub")});
+ add_origin(mul, origin);
+
+ ASSERT_TRUE(has_origin(mul));
+
+ bool add_origin_passed = false;
+ bool sub_origin_passed = false;
+ auto sources = get_origin(mul)->sources();
+ ASSERT_EQ(2, sources.size());
+ for (auto source : sources)
+ {
+ if (source->id() == 3 && source->name().compare("add") == 0)
+ add_origin_passed = true;
+ if (source->id() == 7 && source->name().compare("sub") == 0)
+ sub_origin_passed = true;
+ }
+
+ ASSERT_EQ(true, add_origin_passed);
+ ASSERT_EQ(true, sub_origin_passed);
+}
+
+TEST(LuciCircleNodeOrigin, simple_composite_origin_with_vector)
+{
+ auto g = loco::make_graph();
+ auto mul = g->nodes()->create<luci::CircleMul>();
+
+ ASSERT_FALSE(has_origin(mul));
+
+ std::vector<std::shared_ptr<luci::CircleNodeOrigin>> vec;
+ vec.push_back(luci::single_origin(3, "add"));
+ vec.push_back(luci::single_origin(7, "sub"));
+ auto origin = luci::composite_origin(vec);
+ add_origin(mul, origin);
+
+ ASSERT_TRUE(has_origin(mul));
+
+ bool add_origin_passed = false;
+ bool sub_origin_passed = false;
+ auto sources = get_origin(mul)->sources();
+ ASSERT_EQ(2, sources.size());
+ for (auto source : sources)
+ {
+ if (source->id() == 3 && source->name().compare("add") == 0)
+ add_origin_passed = true;
+ if (source->id() == 7 && source->name().compare("sub") == 0)
+ sub_origin_passed = true;
+ }
+
+ ASSERT_EQ(true, add_origin_passed);
+ ASSERT_EQ(true, sub_origin_passed);
+}
+
+TEST(LuciCircleNodeOrigin, composite_origin_empty_ctor_NEG)
+{
+ ASSERT_ANY_THROW(luci::composite_origin({}));
+}
GTest_AddTest(luci_service_test ${TESTS})
target_include_directories(luci_service_test PRIVATE src)
target_link_libraries(luci_service_test luci_service)
+target_link_libraries(luci_service_test luci_testhelper)
target_link_libraries(luci_service_test oops)
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_NODE_CLONE__
+#define __LUCI_CIRCLE_NODE_CLONE__
+
+#include <luci/IR/CircleNodes.h>
+
+#include <loco/IR/Graph.h>
+
+namespace luci
+{
+
+/**
+ * @brief Copy common attributes of CircleNode from src to dst.
+ */
+void copy_common_attributes(const luci::CircleNode *src, luci::CircleNode *dst);
+
+/**
+ * @brief Return a new cloned CircleNode object with same attributes value of node to graph.
+ * @note Will return nullptr if clone has failed
+ */
+CircleNode *clone_node(const CircleNode *node, loco::Graph *graph);
+
+} // namespace luci
+
+#endif // __LUCI_CIRCLE_NODE_CLONE__
#ifndef __LUCI_CIRCLE_SHAPE_INFERENCE_H__
#define __LUCI_CIRCLE_SHAPE_INFERENCE_H__
-#include "ShapeDescription.h"
-
#include <loco/IR/Nodes.h>
#include <luci/IR/CircleNodes.h>
#include <luci/IR/CircleNodeVisitor.h>
-#include <luci/Service/CircleShapeInferenceHelper.h>
+#include <luci/Service/CircleShapeInferenceRule.h>
namespace luci
{
-/**
- * @brief Get the shape of each node as a node annotation
- *
- * HOW TO USE
- *
- * ShapeInference::get(g->nodes()->at(..));
- */
-struct ShapeInference
-{
- static ShapeDescription get(loco::Node *node);
-};
-
namespace sinf // namespace for Shape Inference
{
{
public:
// TODO Remove this when all of visit function is implemented
- loco::TensorShape visit(const luci::CircleNode *node) final { return sinf::circle_shape(node); }
+ loco::TensorShape visit(const luci::CircleNode *node) final
+ {
+ loco::NodeShape shape;
+ luci::CircleShapeInferenceRule().infer(node, shape);
+ return shape.as<loco::TensorShape>();
+ }
// loco::TensorShape visit(const luci::CircleAbs *node) final;
// loco::TensorShape visit(const luci::CircleAdd *node) final;
// loco::TensorShape visit(const luci::CircleEqual *node) final;
// loco::TensorShape visit(const luci::CircleExp *node) final;
// loco::TensorShape visit(const luci::CircleExpandDims *node) final;
+ // loco::TensorShape visit(const luci::CircleFakeQuant *node) final;
// loco::TensorShape visit(const luci::CircleFill *node) final;
// loco::TensorShape visit(const luci::CircleFloor *node) final;
// loco::TensorShape visit(const luci::CircleFloorDiv *node) final;
// loco::TensorShape visit(const luci::CircleMean *node) final;
// loco::TensorShape visit(const luci::CircleMinimum *node) final;
// loco::TensorShape visit(const luci::CircleMirrorPad *node) final;
+ // loco::TensorShape visit(const luci::CircleMul *node) final;
// loco::TensorShape visit(const luci::CircleNeg *node) final;
// loco::TensorShape visit(const luci::CircleNonMaxSuppressionV4 *node) final;
// loco::TensorShape visit(const luci::CircleNonMaxSuppressionV5 *node) final;
// loco::TensorShape visit(const luci::CircleNotEqual *node) final;
+ // loco::TensorShape visit(const luci::CircleOneHot *node) final;
// loco::TensorShape visit(const luci::CirclePack *node) final;
// loco::TensorShape visit(const luci::CirclePad *node) final;
// loco::TensorShape visit(const luci::CirclePadV2 *node) final;
// loco::TensorShape visit(const luci::CirclePRelu *node) final;
// loco::TensorShape visit(const luci::CircleRange *node) final;
// loco::TensorShape visit(const luci::CircleRank *node) final;
- // loco::TensorShape visit(const luci::CircleMul *node) final;
- // loco::TensorShape visit(const luci::CircleOneHot *node) final;
// loco::TensorShape visit(const luci::CircleReduceAny *node) final;
// loco::TensorShape visit(const luci::CircleReduceMax *node) final;
// loco::TensorShape visit(const luci::CircleReduceMin *node) final;
// loco::TensorShape visit(const luci::CircleInstanceNorm *node) final;
// Virtual
+ // loco::TensorShape visit(const luci::CircleCustomOut *node) final;
+ loco::TensorShape visit(const luci::CircleIfOut *node) final;
// loco::TensorShape visit(const luci::CircleInput *node) final;
+ // loco::TensorShape visit(const luci::CircleNonMaxSuppressionV4Out *node) final;
+ // loco::TensorShape visit(const luci::CircleNonMaxSuppressionV5Out *node) final;
// loco::TensorShape visit(const luci::CircleOutput *node) final;
// loco::TensorShape visit(const luci::CircleOutputDummy *node) final;
// loco::TensorShape visit(const luci::CircleOutputExclude *node) final;
- // loco::TensorShape visit(const luci::CircleCustomOut *node) final;
- // loco::TensorShape visit(const luci::CircleIfOut *node) final;
- // loco::TensorShape visit(const luci::CircleNonMaxSuppressionV4Out *node) final;
- // loco::TensorShape visit(const luci::CircleNonMaxSuppressionV5Out *node) final;
// loco::TensorShape visit(const luci::CircleSplitOut *node) final;
// loco::TensorShape visit(const luci::CircleSplitVOut *node) final;
// loco::TensorShape visit(const luci::CircleTopKV2Out *node) final;
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_CIRCLE_SHAPE_INFERENCE_HELPER_H__
-#define __LUCI_CIRCLE_SHAPE_INFERENCE_HELPER_H__
-
-#include <loco/IR/TensorShape.h>
-
-#include <luci/IR/CircleNodes.h>
-#include <luci/IR/CircleShapeSignature.h>
-
-namespace luci
-{
-namespace sinf // Namespace for Shape Inference
-{
-
-// Return shape of circle node as loco::TensorShape
-loco::TensorShape circle_shape(const luci::CircleNode *node);
-
-} // namespace sinf
-} // namespace luci
-
-#endif // __LUCI_CIRCLE_SHAPE_INFERENCE_HELPER_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_H__
-#define __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_H__
-
-#include <luci/IR/CircleNodes.h>
-#include <luci/IR/CircleNodeVisitor.h>
-#include <luci/IR/CircleShapeSignature.h>
-#include <luci/Service/CircleShapeSignatureInferenceHelper.h>
-
-namespace luci
-{
-
-namespace ssinf // namespace for Shape Signature Inference
-{
-
-struct Rule
-{
- bool infer(const luci::CircleNode *, ShapeSignature &) const;
-};
-
-class Algorithm final : public luci::CircleNodeVisitor<ShapeSignature>
-{
-public:
- // TODO Remove this when visit function is implemented for all the operations.
- ShapeSignature visit(const luci::CircleNode *node) final { return node->shape_signature(); }
-
- // ShapeSignature visit(const luci::CircleAbs *node) final;
- // ShapeSignature visit(const luci::CircleAdd *node) final;
- // ShapeSignature visit(const luci::CircleAddN *node) final;
- // ShapeSignature visit(const luci::CircleArgMax *node) final;
- // ShapeSignature visit(const luci::CircleArgMin *node) final;
- // ShapeSignature visit(const luci::CircleAveragePool2D *node) final;
- // ShapeSignature visit(const luci::CircleBatchMatMul *node) final;
- // ShapeSignature visit(const luci::CircleBatchToSpaceND *node) final;
- // ShapeSignature visit(const luci::CircleCast *node) final;
- // ShapeSignature visit(const luci::CircleCeil *node) final;
- // ShapeSignature visit(const luci::CircleConcatenation *node) final;
- // ShapeSignature visit(const luci::CircleConst *node) final;
- // ShapeSignature visit(const luci::CircleConv2D *node) final;
- // ShapeSignature visit(const luci::CircleCos *node) final;
- // ShapeSignature visit(const luci::CircleCustom *node) final;
- // ShapeSignature visit(const luci::CircleDepthToSpace *node) final;
- // ShapeSignature visit(const luci::CircleDepthwiseConv2D *node) final;
- // ShapeSignature visit(const luci::CircleDequantize *node) final;
- // ShapeSignature visit(const luci::CircleDiv *node) final;
- // ShapeSignature visit(const luci::CircleElu *node) final;
- // ShapeSignature visit(const luci::CircleEqual *node) final;
- // ShapeSignature visit(const luci::CircleExp *node) final;
- // ShapeSignature visit(const luci::CircleExpandDims *node) final;
- // ShapeSignature visit(const luci::CircleFill *node) final;
- // ShapeSignature visit(const luci::CircleFloor *node) final;
- // ShapeSignature visit(const luci::CircleFloorDiv *node) final;
- // ShapeSignature visit(const luci::CircleFloorMod *node) final;
- // ShapeSignature visit(const luci::CircleFullyConnected *node) final;
- // ShapeSignature visit(const luci::CircleGather *node) final;
- // ShapeSignature visit(const luci::CircleGatherNd *node) final;
- // ShapeSignature visit(const luci::CircleGreater *node) final;
- // ShapeSignature visit(const luci::CircleGreaterEqual *node) final;
- // ShapeSignature visit(const luci::CircleIf *node) final;
- // ShapeSignature visit(const luci::CircleL2Normalize *node) final;
- // ShapeSignature visit(const luci::CircleL2Pool2D *node) final;
- // ShapeSignature visit(const luci::CircleLeakyRelu *node) final;
- // ShapeSignature visit(const luci::CircleLess *node) final;
- // ShapeSignature visit(const luci::CircleLessEqual *node) final;
- // ShapeSignature visit(const luci::CircleLocalResponseNormalization *node) final;
- // ShapeSignature visit(const luci::CircleLog *node) final;
- // ShapeSignature visit(const luci::CircleLogicalAnd *node) final;
- // ShapeSignature visit(const luci::CircleLogicalNot *node) final;
- // ShapeSignature visit(const luci::CircleLogicalOr *node) final;
- // ShapeSignature visit(const luci::CircleLogistic *node) final;
- // ShapeSignature visit(const luci::CircleLogSoftmax *node) final;
- // ShapeSignature visit(const luci::CircleMatrixDiag *node) final;
- // ShapeSignature visit(const luci::CircleMatrixSetDiag *node) final;
- // ShapeSignature visit(const luci::CircleMaximum *node) final;
- // ShapeSignature visit(const luci::CircleMaxPool2D *node) final;
- ShapeSignature visit(const luci::CircleMean *node) final;
- // ShapeSignature visit(const luci::CircleMinimum *node) final;
- // ShapeSignature visit(const luci::CircleMirrorPad *node) final;
- // ShapeSignature visit(const luci::CircleNeg *node) final;
- // ShapeSignature visit(const luci::CircleNonMaxSuppressionV4 *node) final;
- // ShapeSignature visit(const luci::CircleNonMaxSuppressionV5 *node) final;
- // ShapeSignature visit(const luci::CircleNotEqual *node) final;
- // ShapeSignature visit(const luci::CirclePack *node) final;
- // ShapeSignature visit(const luci::CirclePad *node) final;
- // ShapeSignature visit(const luci::CirclePadV2 *node) final;
- // ShapeSignature visit(const luci::CirclePow *node) final;
- // ShapeSignature visit(const luci::CirclePRelu *node) final;
- // ShapeSignature visit(const luci::CircleRange *node) final;
- // ShapeSignature visit(const luci::CircleRank *node) final;
- // ShapeSignature visit(const luci::CircleMul *node) final;
- // ShapeSignature visit(const luci::CircleOneHot *node) final;
- ShapeSignature visit(const luci::CircleReduceAny *node) final;
- ShapeSignature visit(const luci::CircleReduceMax *node) final;
- ShapeSignature visit(const luci::CircleReduceMin *node) final;
- ShapeSignature visit(const luci::CircleReduceProd *node) final;
- ShapeSignature visit(const luci::CircleRelu *node) final;
- ShapeSignature visit(const luci::CircleRelu6 *node) final;
- ShapeSignature visit(const luci::CircleReluN1To1 *node) final;
- // ShapeSignature visit(const luci::CircleReshape *node) final;
- // ShapeSignature visit(const luci::CircleResizeBilinear *node) final;
- // ShapeSignature visit(const luci::CircleResizeNearestNeighbor *node) final;
- // ShapeSignature visit(const luci::CircleReverseSequence *node) final;
- // ShapeSignature visit(const luci::CircleReverseV2 *node) final;
- // ShapeSignature visit(const luci::CircleRound *node) final;
- // ShapeSignature visit(const luci::CircleRsqrt *node) final;
- // ShapeSignature visit(const luci::CircleScatterNd *node) final;
- // ShapeSignature visit(const luci::CircleSegmentSum *node) final;
- // ShapeSignature visit(const luci::CircleSelect *node) final;
- // ShapeSignature visit(const luci::CircleSelectV2 *node) final;
- // ShapeSignature visit(const luci::CircleShape *node) final;
- // ShapeSignature visit(const luci::CircleSin *node) final;
- // ShapeSignature visit(const luci::CircleSlice *node) final;
- // ShapeSignature visit(const luci::CircleSoftmax *node) final;
- // ShapeSignature visit(const luci::CircleSpaceToBatchND *node) final;
- // ShapeSignature visit(const luci::CircleSpaceToDepth *node) final;
- // ShapeSignature visit(const luci::CircleSparseToDense *node) final;
- // ShapeSignature visit(const luci::CircleSplit *node) final;
- // ShapeSignature visit(const luci::CircleSplitV *node) final;
- // ShapeSignature visit(const luci::CircleSqrt *node) final;
- // ShapeSignature visit(const luci::CircleSquare *node) final;
- // ShapeSignature visit(const luci::CircleSquaredDifference *node) final;
- // ShapeSignature visit(const luci::CircleSqueeze *node) final;
- // ShapeSignature visit(const luci::CircleStridedSlice *node) final;
- // ShapeSignature visit(const luci::CircleSub *node) final;
- ShapeSignature visit(const luci::CircleSum *node) final;
- // ShapeSignature visit(const luci::CircleTanh *node) final;
- // ShapeSignature visit(const luci::CircleTile *node) final;
- // ShapeSignature visit(const luci::CircleTopKV2 *node) final;
- // ShapeSignature visit(const luci::CircleTranspose *node) final;
- // ShapeSignature visit(const luci::CircleTransposeConv *node) final;
- // ShapeSignature visit(const luci::CircleUnidirectionalSequenceLSTM *node) final;
- // ShapeSignature visit(const luci::CircleUnique *node) final;
- // ShapeSignature visit(const luci::CircleUnpack *node) final;
- // ShapeSignature visit(const luci::CircleWhere *node) final ;
- // ShapeSignature visit(const luci::CircleWhile *node) final;
- // ShapeSignature visit(const luci::CircleZerosLike *node) final;
-
- // Circle Only
- // ShapeSignature visit(const luci::CircleBCQFullyConnected *node) final;
- // ShapeSignature visit(const luci::CircleBCQGather *node) final;
- // ShapeSignature visit(const luci::CircleInstanceNorm *node) final;
-
- // Virtual
- ShapeSignature visit(const luci::CircleInput *node) final;
- ShapeSignature visit(const luci::CircleOutput *node) final;
- ShapeSignature visit(const luci::CircleOutputDummy *node) final;
- ShapeSignature visit(const luci::CircleOutputExclude *node) final;
- // ShapeSignature visit(const luci::CircleCustomOut *node) final;
- // ShapeSignature visit(const luci::CircleIfOut *node) final;
- // ShapeSignature visit(const luci::CircleNonMaxSuppressionV4Out *node) final;
- // ShapeSignature visit(const luci::CircleNonMaxSuppressionV5Out *node) final;
- // ShapeSignature visit(const luci::CircleSplitOut *node) final;
- // ShapeSignature visit(const luci::CircleSplitVOut *node) final;
- // ShapeSignature visit(const luci::CircleTopKV2Out *node) final;
- // ShapeSignature visit(const luci::CircleUniqueOut *node) final;
- // ShapeSignature visit(const luci::CircleUnpackOut *node) final;
- // ShapeSignature visit(const luci::CircleWhileOut *node) final;
-};
-
-} // namespace ssinf
-
-} // namespace luci
-
-#endif // __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_HELPER_H__
-#define __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_HELPER_H__
-
-#include <luci/IR/CircleNodes.h>
-#include <luci/IR/CircleShapeSignature.h>
-
-namespace luci
-{
-
-namespace ssinf // Namespace for Shape Signature Inference
-{
-
-// Return empty signature if all of dimensions are known.
-// If at least one of dimensions is unknown, return signature without change.
-ShapeSignature legalized_signature(const luci::ShapeSignature &signature);
-
-// Return reduced input_signature with indices and keep_dims.
-// - indices : reduction index
-// - keep_dims : If true, rank is not changed. If false, rank is reduced along indices.
-ShapeSignature reduced_signature(const loco::Node *node, const loco::Node *indices, bool keep_dims);
-
-// Return signature of index-th argument of node.
-ShapeSignature input_arg_signature(const luci::CircleNode *node, uint32_t index);
-
-} // namespace ssinf
-
-} // namespace luci
-
-#endif // __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_HELPER_H__
#include <luci/IR/CircleNodes.h>
#include <luci/IR/CircleNodeVisitor.h>
-#include <luci/Service/CircleTypeInferenceHelper.h>
+#include <luci/Service/CircleTypeInferenceRule.h>
namespace luci
{
-/**
- * @brief Get the type of each node as NodeAnnotation
- *
- * HOW TO USE
- *
- * TypeInference::get(g->nodes()->at(0));
- * TypeInference::get(g->nodes()->at(...));
- */
-struct TypeInference
-{
- static circle::TensorType get(loco::Node *node);
-};
-
namespace tinf // namespace for Type Inference
{
{
public:
// TODO Remove this when all of visit function is implemented
- loco::DataType visit(const luci::CircleNode *node) final { return node->dtype(); }
+ loco::DataType visit(const luci::CircleNode *node) final
+ {
+ loco::DataType dtype;
+ luci::CircleTypeInferenceRule().infer(node, dtype);
+ return dtype;
+ }
// loco::DataType visit(const luci::CircleAbs *node) final;
// loco::DataType visit(const luci::CircleAdd *node) final;
// loco::DataType visit(const luci::CircleEqual *node) final;
// loco::DataType visit(const luci::CircleExp *node) final;
// loco::DataType visit(const luci::CircleExpandDims *node) final;
+ // loco::DataType visit(const luci::CircleFakeQuant *node) final;
// loco::DataType visit(const luci::CircleFill *node) final;
// loco::DataType visit(const luci::CircleFloor *node) final;
// loco::DataType visit(const luci::CircleFloorDiv *node) final;
// loco::DataType visit(const luci::CircleOutputDummy *node) final;
// loco::DataType visit(const luci::CircleOutputExclude *node) final;
// loco::DataType visit(const luci::CircleCustomOut *node) final;
- // loco::DataType visit(const luci::CircleIfOut *node) final;
+ loco::DataType visit(const luci::CircleIfOut *node) final;
// loco::DataType visit(const luci::CircleNonMaxSuppressionV4Out *node) final;
// loco::DataType visit(const luci::CircleNonMaxSuppressionV5Out *node) final;
// loco::DataType visit(const luci::CircleSplitOut *node) final;
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_CIRCLE_TYPE_INFERENCE_HELPER_H__
-#define __LUCI_CIRCLE_TYPE_INFERENCE_HELPER_H__
-
-#include <luci/IR/CircleNodes.h>
-
-#include <loco/IR/DataType.h>
-
-namespace luci
-{
-namespace tinf // Namespace for Type Inference
-{
-
-// Helper function will be added
-
-} // namespace tinf
-} // namespace luci
-
-#endif // __LUCI_CIRCLE_TYPE_INFERENCE_HELPER_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_SERVICE_CIRCLE_CONST_H__
+#define __LUCI_SERVICE_CIRCLE_CONST_H__
+
+#include <luci/IR/Nodes/CircleConst.h>
+
+namespace luci
+{
+
+/**
+ * @brief Return cloned object of CircleConst node
+ */
+luci::CircleConst *clone(luci::CircleConst *node);
+
+} // namespace luci
+
+#endif // __LUCI_SERVICE_CIRCLE_CONST_H__
// TODO remove these when CircleDialect is fully functioal
ShapeDescription to_shape_description(const luci::CircleNode *node);
ShapeDescription to_shape_description(const loco::TensorShape &shape);
-ShapeDescription to_shape_description(const loco::FeatureShape &shape);
-ShapeDescription to_shape_description(const loco::FilterShape &shape);
-ShapeDescription to_shape_description(const loco::BiasShape &shape);
-ShapeDescription to_shape_description(const loco::MatrixShape &shape);
ShapeDescription to_shape_description(const loco::NodeShape &shape);
template <typename Permutation> inline bool isNHWC(Permutation *perm);
#ifndef __LUCI_SERVICE_VALIDATE_H__
#define __LUCI_SERVICE_VALIDATE_H__
+#include <luci/IR/Module.h>
+
#include <loco.h>
namespace luci
bool validate(loco::Graph *);
+/**
+ * @brief Return true if all nodes in graph have non empty name
+ */
+bool validate_name(loco::Graph *);
+
+/**
+ * @brief Return true if all names in the Module are unique
+ * @note CircleOutput may have duplicate name
+ */
+bool validate_unique_name(luci::Module *);
+
} // namespace luci
#endif // __LUCI_SERVICE_VALIDATE_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_CLONE_NODE_H__
+#define __CIRCLE_CLONE_NODE_H__
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/IR/CircleNodeVisitor.h>
+
+namespace luci
+{
+
+class CloneNode final : public luci::CircleNodeVisitor<luci::CircleNode *>
+{
+public:
+ CloneNode(loco::Graph *graph) : _graph(graph){};
+
+public:
+ luci::CircleNode *visit(const luci::CircleAbs *) final;
+ luci::CircleNode *visit(const luci::CircleAdd *) final;
+ luci::CircleNode *visit(const luci::CircleAddN *) final;
+ luci::CircleNode *visit(const luci::CircleArgMax *) final;
+ luci::CircleNode *visit(const luci::CircleArgMin *) final;
+ luci::CircleNode *visit(const luci::CircleAveragePool2D *) final;
+ luci::CircleNode *visit(const luci::CircleBatchMatMul *) final;
+ luci::CircleNode *visit(const luci::CircleBatchToSpaceND *) final;
+ luci::CircleNode *visit(const luci::CircleCast *) final;
+ luci::CircleNode *visit(const luci::CircleCeil *) final;
+ luci::CircleNode *visit(const luci::CircleConcatenation *) final;
+ luci::CircleNode *visit(const luci::CircleConst *) final;
+ luci::CircleNode *visit(const luci::CircleConv2D *) final;
+ luci::CircleNode *visit(const luci::CircleCos *) final;
+ luci::CircleNode *visit(const luci::CircleCustom *) final;
+ luci::CircleNode *visit(const luci::CircleDepthToSpace *) final;
+ luci::CircleNode *visit(const luci::CircleDepthwiseConv2D *) final;
+ luci::CircleNode *visit(const luci::CircleDequantize *) final;
+ luci::CircleNode *visit(const luci::CircleDiv *) final;
+ luci::CircleNode *visit(const luci::CircleElu *) final;
+ luci::CircleNode *visit(const luci::CircleEqual *) final;
+ luci::CircleNode *visit(const luci::CircleExp *) final;
+ luci::CircleNode *visit(const luci::CircleExpandDims *) final;
+ luci::CircleNode *visit(const luci::CircleFakeQuant *) final;
+ luci::CircleNode *visit(const luci::CircleFill *) final;
+ luci::CircleNode *visit(const luci::CircleFloor *) final;
+ luci::CircleNode *visit(const luci::CircleFloorDiv *) final;
+ luci::CircleNode *visit(const luci::CircleFloorMod *) final;
+ luci::CircleNode *visit(const luci::CircleFullyConnected *) final;
+ luci::CircleNode *visit(const luci::CircleGather *) final;
+ luci::CircleNode *visit(const luci::CircleGatherNd *) final;
+ luci::CircleNode *visit(const luci::CircleGreater *) final;
+ luci::CircleNode *visit(const luci::CircleGreaterEqual *) final;
+ // luci::CircleNode *visit(const luci::CircleIf *) final;
+ luci::CircleNode *visit(const luci::CircleL2Normalize *) final;
+ luci::CircleNode *visit(const luci::CircleL2Pool2D *) final;
+ luci::CircleNode *visit(const luci::CircleLeakyRelu *) final;
+ luci::CircleNode *visit(const luci::CircleLess *) final;
+ luci::CircleNode *visit(const luci::CircleLessEqual *) final;
+ luci::CircleNode *visit(const luci::CircleLocalResponseNormalization *) final;
+ luci::CircleNode *visit(const luci::CircleLog *) final;
+ luci::CircleNode *visit(const luci::CircleLogicalAnd *) final;
+ luci::CircleNode *visit(const luci::CircleLogicalNot *) final;
+ luci::CircleNode *visit(const luci::CircleLogicalOr *) final;
+ luci::CircleNode *visit(const luci::CircleLogistic *) final;
+ luci::CircleNode *visit(const luci::CircleLogSoftmax *) final;
+ luci::CircleNode *visit(const luci::CircleMatrixDiag *) final;
+ luci::CircleNode *visit(const luci::CircleMatrixSetDiag *) final;
+ luci::CircleNode *visit(const luci::CircleMaximum *) final;
+ luci::CircleNode *visit(const luci::CircleMaxPool2D *) final;
+ luci::CircleNode *visit(const luci::CircleMean *) final;
+ luci::CircleNode *visit(const luci::CircleMinimum *) final;
+ luci::CircleNode *visit(const luci::CircleMirrorPad *) final;
+ luci::CircleNode *visit(const luci::CircleMul *) final;
+ luci::CircleNode *visit(const luci::CircleNeg *) final;
+ luci::CircleNode *visit(const luci::CircleNonMaxSuppressionV4 *) final;
+ luci::CircleNode *visit(const luci::CircleNonMaxSuppressionV5 *) final;
+ luci::CircleNode *visit(const luci::CircleNotEqual *) final;
+ luci::CircleNode *visit(const luci::CircleOneHot *) final;
+ luci::CircleNode *visit(const luci::CirclePack *) final;
+ luci::CircleNode *visit(const luci::CirclePad *) final;
+ luci::CircleNode *visit(const luci::CirclePadV2 *) final;
+ luci::CircleNode *visit(const luci::CirclePow *) final;
+ luci::CircleNode *visit(const luci::CirclePRelu *) final;
+ luci::CircleNode *visit(const luci::CircleRange *) final;
+ luci::CircleNode *visit(const luci::CircleRank *) final;
+ luci::CircleNode *visit(const luci::CircleReduceAny *) final;
+ luci::CircleNode *visit(const luci::CircleReduceMax *) final;
+ luci::CircleNode *visit(const luci::CircleReduceMin *) final;
+ luci::CircleNode *visit(const luci::CircleReduceProd *) final;
+ luci::CircleNode *visit(const luci::CircleRelu *) final;
+ luci::CircleNode *visit(const luci::CircleRelu6 *) final;
+ luci::CircleNode *visit(const luci::CircleReluN1To1 *) final;
+ luci::CircleNode *visit(const luci::CircleReshape *) final;
+ luci::CircleNode *visit(const luci::CircleResizeBilinear *) final;
+ luci::CircleNode *visit(const luci::CircleResizeNearestNeighbor *) final;
+ luci::CircleNode *visit(const luci::CircleReverseSequence *) final;
+ luci::CircleNode *visit(const luci::CircleReverseV2 *) final;
+ luci::CircleNode *visit(const luci::CircleRound *) final;
+ luci::CircleNode *visit(const luci::CircleRsqrt *) final;
+ luci::CircleNode *visit(const luci::CircleScatterNd *) final;
+ luci::CircleNode *visit(const luci::CircleSegmentSum *) final;
+ luci::CircleNode *visit(const luci::CircleSelect *) final;
+ luci::CircleNode *visit(const luci::CircleSelectV2 *) final;
+ luci::CircleNode *visit(const luci::CircleShape *) final;
+ luci::CircleNode *visit(const luci::CircleSin *) final;
+ luci::CircleNode *visit(const luci::CircleSlice *) final;
+ luci::CircleNode *visit(const luci::CircleSoftmax *) final;
+ luci::CircleNode *visit(const luci::CircleSpaceToBatchND *) final;
+ luci::CircleNode *visit(const luci::CircleSpaceToDepth *) final;
+ luci::CircleNode *visit(const luci::CircleSparseToDense *) final;
+ luci::CircleNode *visit(const luci::CircleSplit *) final;
+ luci::CircleNode *visit(const luci::CircleSplitV *) final;
+ luci::CircleNode *visit(const luci::CircleSqrt *) final;
+ luci::CircleNode *visit(const luci::CircleSquare *) final;
+ luci::CircleNode *visit(const luci::CircleSquaredDifference *) final;
+ luci::CircleNode *visit(const luci::CircleSqueeze *) final;
+ luci::CircleNode *visit(const luci::CircleStridedSlice *) final;
+ luci::CircleNode *visit(const luci::CircleSub *) final;
+ luci::CircleNode *visit(const luci::CircleSum *) final;
+ luci::CircleNode *visit(const luci::CircleTanh *) final;
+ luci::CircleNode *visit(const luci::CircleTile *) final;
+ luci::CircleNode *visit(const luci::CircleTopKV2 *) final;
+ luci::CircleNode *visit(const luci::CircleTranspose *) final;
+ luci::CircleNode *visit(const luci::CircleTransposeConv *) final;
+ luci::CircleNode *visit(const luci::CircleUnidirectionalSequenceLSTM *) final;
+ luci::CircleNode *visit(const luci::CircleUnique *) final;
+ luci::CircleNode *visit(const luci::CircleUnpack *) final;
+ luci::CircleNode *visit(const luci::CircleWhere *) final;
+ // luci::CircleNode *visit(const luci::CircleWhile *) final;
+ luci::CircleNode *visit(const luci::CircleZerosLike *) final;
+
+ // Circle Only
+ luci::CircleNode *visit(const luci::CircleBCQFullyConnected *) final;
+ luci::CircleNode *visit(const luci::CircleBCQGather *) final;
+ luci::CircleNode *visit(const luci::CircleInstanceNorm *) final;
+
+ // Virtual
+ luci::CircleNode *visit(const luci::CircleCustomOut *) final;
+ // luci::CircleNode *visit(const luci::CircleIfOut *) final;
+ // luci::CircleNode *visit(const luci::CircleInput *) final;
+ luci::CircleNode *visit(const luci::CircleNonMaxSuppressionV4Out *) final;
+ luci::CircleNode *visit(const luci::CircleNonMaxSuppressionV5Out *) final;
+ // luci::CircleNode *visit(const luci::CircleOutput *) final;
+ luci::CircleNode *visit(const luci::CircleOutputDummy *) final;
+ luci::CircleNode *visit(const luci::CircleOutputExclude *) final;
+ luci::CircleNode *visit(const luci::CircleSplitOut *) final;
+ luci::CircleNode *visit(const luci::CircleSplitVOut *) final;
+ luci::CircleNode *visit(const luci::CircleTopKV2Out *) final;
+ luci::CircleNode *visit(const luci::CircleUniqueOut *) final;
+ luci::CircleNode *visit(const luci::CircleUnpackOut *) final;
+ // luci::CircleNode *visit(const luci::CircleWhileOut *) final;
+
+ // NOTE CircleNodeVisitor will throw if not supported here
+
+protected:
+ loco::Graph *_graph = nullptr;
+};
+
+} // namespace luci
+
+#endif // __CIRCLE_CLONE_NODE_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include "CircleCloneNode.h"
+
+#include <oops/UserExn.h>
+
+#include <cassert>
+
+namespace luci
+{
+
+/**
+ * @note Attributes of specific node type like keep_dims() of CircleSum are
+ * not copied.
+ */
+void copy_common_attributes(const luci::CircleNode *src, luci::CircleNode *dst)
+{
+ assert(src != nullptr);
+ assert(dst != nullptr);
+
+ dst->name(src->name());
+ dst->dtype(src->dtype());
+
+ dst->rank(src->rank());
+ for (uint32_t i = 0; i < src->rank(); i++)
+ {
+ dst->dim(i) = src->dim(i);
+ }
+ dst->shape_status(src->shape_status());
+
+ // quantparam
+ const auto *quantparam = src->quantparam();
+ if (quantparam != nullptr)
+ {
+ auto qparam = std::make_unique<luci::CircleQuantParam>();
+ qparam->scale = quantparam->scale;
+ qparam->zerop = quantparam->zerop;
+ qparam->min = quantparam->min;
+ qparam->max = quantparam->max;
+ qparam->quantized_dimension = quantparam->quantized_dimension;
+
+ dst->quantparam(std::move(qparam));
+ }
+
+ // sparsity
+ const auto *sparsity = src->sparsityparam();
+ if (sparsity != nullptr)
+ {
+ auto sparam = std::make_unique<luci::SparsityParam>();
+ sparam->traversal_order = sparsity->traversal_order;
+ sparam->block_map = sparsity->block_map;
+ sparam->dim_metadata = sparsity->dim_metadata;
+
+ dst->sparsityparam(std::move(sparam));
+ }
+
+ // op version
+ dst->op_version(src->op_version());
+}
+
+/**
+ * @note Each visit implementation must copy node specific attributes.
+ */
+luci::CircleNode *clone_node(const luci::CircleNode *node, loco::Graph *graph)
+{
+ if (node == nullptr || graph == nullptr)
+ return nullptr;
+
+ CloneNode cn(graph);
+ auto cloned = node->accept(&cn);
+ if (cloned != nullptr)
+ copy_common_attributes(node, cloned);
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+// NOTE any node will do for testing
+#include <luci/IR/Nodes/CircleAdd.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+luci::CircleAdd *build_simple_add_graph(loco::Graph *g)
+{
+ auto node = g->nodes()->create<luci::CircleAdd>();
+
+ node->name("name");
+ node->dtype(loco::DataType::FLOAT32);
+ node->rank(1);
+ node->dim(0).set(3);
+ node->shape_status(luci::ShapeStatus::VALID);
+ node->fusedActivationFunction(luci::FusedActFunc::NONE);
+
+ auto qparam = std::make_unique<luci::CircleQuantParam>();
+ qparam->scale = {1.0};
+ qparam->zerop = {0};
+ qparam->min = {0.0};
+ qparam->max = {1.0};
+ qparam->quantized_dimension = 0;
+ node->quantparam(std::move(qparam));
+
+ auto sparam = std::make_unique<luci::SparsityParam>();
+ sparam->traversal_order = {0};
+ sparam->block_map = {0};
+ sparam->dim_metadata = {luci::DimMetaData(luci::DimensionType::DENSE, 1)};
+ node->sparsityparam(std::move(sparam));
+
+ node->op_version(2);
+
+ return node;
+}
+
+} // namespace
+
+TEST(CircleNodeCloneTest, copy_attribites)
+{
+ auto g = loco::make_graph();
+ auto node = build_simple_add_graph(g.get());
+
+ auto copy = g->nodes()->create<luci::CircleAdd>();
+ luci::copy_common_attributes(node, copy);
+
+ ASSERT_EQ(node->name(), copy->name());
+ ASSERT_EQ(node->dtype(), copy->dtype());
+ ASSERT_EQ(node->rank(), copy->rank());
+ ASSERT_EQ(node->shape_status(), copy->shape_status());
+
+ const auto *qparam_node = node->quantparam();
+ const auto *qparam_copy = copy->quantparam();
+ ASSERT_EQ(qparam_node->scale, qparam_copy->scale);
+
+ const auto *sparsity_node = node->sparsityparam();
+ const auto *sparsity_copy = copy->sparsityparam();
+ ASSERT_EQ(sparsity_node->traversal_order, sparsity_copy->traversal_order);
+
+ ASSERT_EQ(node->op_version(), copy->op_version());
+}
+
+TEST(CircleNodeCloneTest, clone_add_node)
+{
+ auto g = loco::make_graph();
+ auto node = build_simple_add_graph(g.get());
+
+ auto cg = loco::make_graph();
+ auto clone = clone_node(node, cg.get());
+
+ ASSERT_NE(nullptr, clone);
+ ASSERT_EQ(cg.get(), clone->graph());
+ ASSERT_EQ(node->name(), clone->name());
+ ASSERT_EQ(node->dtype(), clone->dtype());
+ ASSERT_EQ(node->rank(), clone->rank());
+ ASSERT_EQ(node->shape_status(), clone->shape_status());
+}
+
+TEST(CircleNodeCloneTest, clone_node_NEG)
+{
+ auto g = loco::make_graph();
+ auto node = build_simple_add_graph(g.get());
+
+ auto cg = loco::make_graph();
+ auto clone = luci::clone_node(nullptr, cg.get());
+ ASSERT_EQ(nullptr, clone);
+ auto clone2 = luci::clone_node(node, nullptr);
+ ASSERT_EQ(nullptr, clone2);
+}
*/
#include "luci/Service/CircleShapeInference.h"
-#include "luci/Service/ShapeDescription.h"
+
+#include "CircleShapeInferenceHelper.h"
#include <loco.h>
-#include <loco/Service/ShapeInference.h>
#include <luci/Log.h>
#include <cassert>
#include <iostream>
-namespace luci
-{
-
-ShapeDescription ShapeInference::get(loco::Node *node)
-{
- assert(loco::shape_known(node));
- return to_shape_description(loco::shape_get(node));
-}
-
-} // namespace luci
-
namespace
{
{
if (r)
os << ",";
- os << tensor_shape.dim(r).value();
+
+ if (tensor_shape.dim(r).known())
+ os << tensor_shape.dim(r).value();
+ else
+ os << "?";
}
os << "]";
return os;
return true;
}
-} // namespace ssinf
+} // namespace sinf
} // namespace luci
* limitations under the License.
*/
-#include "luci/Service/CircleShapeInferenceHelper.h"
+#include "CircleShapeInferenceHelper.h"
+
+namespace luci
+{
+
+loco::NodeShape shape_get(const loco::Node *node)
+{
+ assert(luci::shape_known(node));
+ return loco::NodeShape{sinf::circle_shape(loco::must_cast<const luci::CircleNode *>(node))};
+}
+
+bool shape_known(const loco::Node *node)
+{
+ return loco::must_cast<const luci::CircleNode *>(node)->shape_status() !=
+ luci::ShapeStatus::UNDEFINED;
+}
+
+} // namespace luci
namespace luci
{
loco::TensorShape shape;
shape.rank(node->rank());
for (uint32_t r = 0; r < node->rank(); ++r)
- shape.dim(r) = loco::Dimension(node->dim(r).value());
+ shape.dim(r) = node->dim(r);
return shape;
}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_SHAPE_INFERENCE_HELPER_H__
+#define __LUCI_CIRCLE_SHAPE_INFERENCE_HELPER_H__
+
+#include <loco/IR/NodeShape.h>
+#include <loco/IR/TensorShape.h>
+
+#include <luci/IR/CircleNodes.h>
+
+namespace luci
+{
+
+// NOTE Functions in this namespace will be removed after new inference
+// algorithms are fully implemented.
+
+// This function is temporary function for deprecating loco::shape_get
+loco::NodeShape shape_get(const loco::Node *node);
+
+// This function is temporary function for deprecating loco::shape_known
+bool shape_known(const loco::Node *node);
+
+} // namespace luci
+
+namespace luci
+{
+namespace sinf // Namespace for Shape Inference
+{
+
+// Return shape of circle node as loco::TensorShape
+loco::TensorShape circle_shape(const luci::CircleNode *node);
+
+} // namespace sinf
+} // namespace luci
+
+#endif // __LUCI_CIRCLE_SHAPE_INFERENCE_HELPER_H__
#include "luci/Service/CircleShapeInferenceRule.h"
#include "Check.h"
+#include "CircleShapeInferenceHelper.h"
#include "ShapeInfer_StridedSlice.h"
#include <luci/IR/CircleNodes.h>
{
if (r)
os << ",";
- os << tensor_shape.dim(r).value();
+
+ if (tensor_shape.dim(r).known())
+ os << tensor_shape.dim(r).value();
+ else
+ os << "?";
}
os << "]";
return os;
loco::TensorShape shape;
shape.rank(node->rank());
for (uint32_t r = 0; r < node->rank(); ++r)
- shape.dim(r) = loco::Dimension(node->dim(r).value());
+ {
+ // Shape inference rules in this file did not consider unknown dimension.
+ // If some node has unknown dimension, 0 is inserted and wrong shape
+ // inference was done as a result.
+ // To fix this, new shape inference algorithm is being implemented.
+ // Until new inference algorithm is fully implemented, unknown dimension
+ // would be represented as 1 along with TFLite expression.
+ shape.dim(r) = node->dim(r).known() ? node->dim(r).value() : 1;
+ }
return shape;
}
output_shape.rank(rank);
for (uint32_t axis = 0; axis < rank; ++axis)
{
- assert(x.dim(axis).known() && y.dim(axis).known());
-
- auto x_dim = x.dim(axis).value();
- auto y_dim = y.dim(axis).value();
+ auto x_dim = x.dim(axis).known() ? x.dim(axis).value() : 1;
+ auto y_dim = y.dim(axis).known() ? y.dim(axis).value() : 1;
// each dimension of x and y should be same or one must be 1 if different
if (!((x_dim == y_dim) || (x_dim == 1 || y_dim == 1)))
template <class CIRCLENODE> loco::NodeShape broadcast_xy(const CIRCLENODE *node)
{
- auto x_shape = loco::shape_get(node->x()).template as<loco::TensorShape>();
- auto y_shape = loco::shape_get(node->y()).template as<loco::TensorShape>();
+ auto x_shape = luci::shape_get(node->x()).template as<loco::TensorShape>();
+ auto y_shape = luci::shape_get(node->y()).template as<loco::TensorShape>();
auto output_shape = broadcast_shape(x_shape, y_shape);
return loco::NodeShape{output_shape};
}
+template <class CIRCLENODE> loco::NodeShape use_inputs(const CIRCLENODE *node)
+{
+ auto inputs_shape = luci::shape_get(node->inputs()).template as<loco::TensorShape>();
+ return loco::NodeShape{inputs_shape};
+}
+
template <class CIRCLENODE> loco::NodeShape use_x(const CIRCLENODE *node)
{
- auto x_shape = loco::shape_get(node->x()).template as<loco::TensorShape>();
+ auto x_shape = luci::shape_get(node->x()).template as<loco::TensorShape>();
return loco::NodeShape{x_shape};
}
template <class CIRCLENODE> loco::NodeShape use_logits(const CIRCLENODE *node)
{
- auto shape = loco::shape_get(node->logits()).template as<loco::TensorShape>();
+ auto shape = luci::shape_get(node->logits()).template as<loco::TensorShape>();
return loco::NodeShape{shape};
}
{
const loco::DataType S32 = loco::DataType::S32;
- auto input_shape = loco::shape_get(node->input()).template as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->input()).template as<loco::TensorShape>();
// TODO support other data type
LUCI_ASSERT(paddings->dtype() == S32, "Only support int 32 for now");
loco::NodeShape infer_add_n(const luci::CircleAddN *node)
{
- auto shape = loco::shape_get(node->inputs(0)).as<loco::TensorShape>();
+ auto shape = luci::shape_get(node->inputs(0)).as<loco::TensorShape>();
for (uint32_t idx = 1; idx < node->arity(); ++idx)
{
- auto shape_idx = loco::shape_get(node->inputs(idx)).as<loco::TensorShape>();
+ auto shape_idx = luci::shape_get(node->inputs(idx)).as<loco::TensorShape>();
if (!(shape == shape_idx))
{
INTERNAL_EXN_V("ADD_N shape not same as the first input: ", idx);
loco::NodeShape infer_arg_max(const luci::CircleArgMax *node)
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- auto dimension_shape = loco::shape_get(node->dimension()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
+ auto dimension_shape = luci::shape_get(node->dimension()).as<loco::TensorShape>();
int64_t select_axis = 0;
{
loco::NodeShape infer_arg_min(const luci::CircleArgMin *node)
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- auto dimension_shape = loco::shape_get(node->dimension()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
+ auto dimension_shape = luci::shape_get(node->dimension()).as<loco::TensorShape>();
int64_t select_axis = 0;
{
// Call this for CircleAvgPool2D and CircleMaxPool2D only
template <class Pool2DType> loco::NodeShape infer_pool_2d_shape(const Pool2DType *node)
{
- LUCI_ASSERT(loco::shape_known(node->value()), "Shape must be known");
-
- auto ifm_shape = loco::shape_get(node->value()).template as<loco::TensorShape>();
+ auto ifm_shape = luci::shape_get(node->value()).template as<loco::TensorShape>();
assert(ifm_shape.rank() == 4);
uint32_t input_height = ifm_shape.dim(1).value();
{
const loco::DataType S32 = loco::DataType::S32;
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
// Support only input rank is 3 and 4
assert(input_shape.rank() == 3 || input_shape.rank() == 4);
auto const_crops = loco::must_cast<luci::CircleConst *>(node->crops());
LUCI_ASSERT(const_crops->dtype() == loco::DataType::S32, "Only support int32 crops");
- auto const_block_shape_shape = loco::shape_get(const_block_shape).as<loco::TensorShape>();
- auto const_crops_shape = loco::shape_get(const_crops).as<loco::TensorShape>();
+ auto const_block_shape_shape = luci::shape_get(const_block_shape).as<loco::TensorShape>();
+ auto const_crops_shape = luci::shape_get(const_crops).as<loco::TensorShape>();
assert(const_block_shape_shape.rank() == 1);
assert(const_crops_shape.rank() == 2);
template <class Conv2DType> OutputSize infer_conv2d_type(const Conv2DType *node)
{
- auto ifm_shape = loco::shape_get(node->input()).template as<loco::TensorShape>();
- auto ker_shape = loco::shape_get(node->filter()).template as<loco::TensorShape>();
+ auto ifm_shape = luci::shape_get(node->input()).template as<loco::TensorShape>();
+ auto ker_shape = luci::shape_get(node->filter()).template as<loco::TensorShape>();
assert(ifm_shape.rank() == 4);
assert(ker_shape.rank() == 4);
loco::Dimension y_lhs = adj_y ? y_shape.dim(y_rank - 1) : y_shape.dim(y_rank - 2);
loco::Dimension y_rhs = adj_y ? y_shape.dim(y_rank - 2) : y_shape.dim(y_rank - 1);
- if (not(x_rhs == y_lhs))
+ if (x_rhs.known() && y_lhs.known() && not(x_rhs == y_lhs))
INTERNAL_EXN("x_rhs and y_lhs should be same");
uint32_t out_rank = output_shape.rank();
// TODO Support when CircleConcatenation has 0 input
assert(node->numValues() > 0);
- auto first_shape = loco::shape_get(node->values(0)).as<loco::TensorShape>();
+ auto first_shape = luci::shape_get(node->values(0)).as<loco::TensorShape>();
auto axis = node->axis();
if (axis < 0)
axis += first_shape.rank();
for (uint32_t i = 1; i < node->numValues(); ++i)
{
- auto input_shape = loco::shape_get(node->values(i)).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->values(i)).as<loco::TensorShape>();
for (uint32_t j = 0; j < output_shape.rank(); ++j)
{
if (j == static_cast<uint32_t>(axis))
+ {
+ // If dimension is unknown, value() will return 0.
+ // This is wrong but until new inference algorithm is implemented,
+ // this code will not be modified to keep compatibility.
output_shape.dim(j) = output_shape.dim(j).value() + input_shape.dim(j).value();
+ }
else
- assert(output_shape.dim(j) == input_shape.dim(j));
+ assert(!output_shape.dim(j).known() || !input_shape.dim(j).known() ||
+ output_shape.dim(j) == input_shape.dim(j));
}
}
{
LOGGER(l);
- auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); // in NHWC
- auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in OHWI
+ auto ifm_shape = luci::shape_get(node->input()).as<loco::TensorShape>(); // in NHWC
+ auto ker_shape = luci::shape_get(node->filter()).as<loco::TensorShape>(); // in OHWI
INFO(l) << "[luci] CircleConv2D ShapeInf ifm(" << ifm_shape.rank() << ") ker(" << ker_shape.rank()
<< ")" << std::endl;
loco::NodeShape infer_depth_to_space(const luci::CircleDepthToSpace *node)
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
LUCI_ASSERT(input_shape.rank() == 4, "Only input rank 4 is supported");
// Only data format NHWC is supported
loco::NodeShape infer_depthwise_conv2d(const luci::CircleDepthwiseConv2D *node)
{
- auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); // in NHWC
- auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in 1 H W CM
+ auto ifm_shape = luci::shape_get(node->input()).as<loco::TensorShape>(); // in NHWC
+ auto ker_shape = luci::shape_get(node->filter()).as<loco::TensorShape>(); // in 1 H W CM
assert(ifm_shape.rank() == 4);
assert(ker_shape.rank() == 4);
assert(ker_shape.dim(0).value() == 1);
+ assert(ifm_shape.dim(3).value() * node->depthMultiplier() == ker_shape.dim(3).value());
auto os = infer_conv2d_type(node);
loco::NodeShape infer_expand_dims(const luci::CircleExpandDims *node)
{
const loco::DataType S32 = loco::DataType::S32;
- auto x_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto x_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
if (x_shape.rank() == 0)
{
// This maybe for unknown shape. We use shape from the node itself.
}
int32_t axis = const_axis->at<S32>(0);
LUCI_ASSERT((axis <= static_cast<int32_t>(x_shape.rank())) &&
- (axis >= -1 - static_cast<int32_t>(x_shape.rank())),
+ (axis >= -1 - static_cast<int32_t>(x_shape.rank())),
"Axis has to be between [-(D+1), D], where D is rank of input.");
size_t positive_axis = axis < 0 ? x_shape.rank() + axis + 1 : axis;
loco::TensorShape output_shape;
loco::NodeShape infer_fully_connected(const luci::CircleFullyConnected *node)
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- auto weights_shape = loco::shape_get(node->weights()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
+ auto weights_shape = luci::shape_get(node->weights()).as<loco::TensorShape>();
// Checking shape capability for fully connected layer
// Input: a tensor of at least rank 2 [D1, D2, ... Dn]
{
loco::TensorShape output_shape;
- const auto input_shape = loco::shape_get(node->params()).as<loco::TensorShape>();
- const auto positions_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+ const auto input_shape = luci::shape_get(node->params()).as<loco::TensorShape>();
+ const auto positions_shape = luci::shape_get(node->indices()).as<loco::TensorShape>();
int32_t axis = node->axis();
// If CircleGather input has a dynamic shape, it can't inference this shape. So, it returns the
{
loco::TensorShape output_shape;
- const auto params_shape = loco::shape_get(node->params()).as<loco::TensorShape>();
- const auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+ const auto params_shape = luci::shape_get(node->params()).as<loco::TensorShape>();
+ const auto indices_shape = luci::shape_get(node->indices()).as<loco::TensorShape>();
const auto params_rank = params_shape.rank();
const auto indices_rank = indices_shape.rank();
{
loco::TensorShape output_shape;
- auto diagonal_shape = loco::shape_get(node->diagonal()).as<loco::TensorShape>();
+ auto diagonal_shape = luci::shape_get(node->diagonal()).as<loco::TensorShape>();
auto rank = diagonal_shape.rank();
output_shape.rank(rank + 1);
loco::NodeShape infer_matrix_set_diag(const luci::CircleMatrixSetDiag *node)
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- auto diagonal_shape = loco::shape_get(node->diagonal()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
+ auto diagonal_shape = luci::shape_get(node->diagonal()).as<loco::TensorShape>();
auto rank = diagonal_shape.rank();
{
const loco::DataType S32 = loco::DataType::S32;
- auto input_shape = loco::shape_get(input).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(input).as<loco::TensorShape>();
auto reduction_indices = loco::must_cast<const luci::CircleConst *>(indices);
{ // Exceptions
loco::NodeShape infer_one_hot(const luci::CircleOneHot *node)
{
const loco::DataType S32 = loco::DataType::S32;
- auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+ auto indices_shape = luci::shape_get(node->indices()).as<loco::TensorShape>();
// Only support OneHot node's depth() is CircleConst with type S32
// TODO support depth with other types
auto depth = loco::must_cast<luci::CircleConst *>(node->depth());
{
LUCI_ASSERT(node->values_count() > 0, "Only support one or more inputs");
- auto first_shape = loco::shape_get(node->values(0)).as<loco::TensorShape>();
+ auto first_shape = luci::shape_get(node->values(0)).as<loco::TensorShape>();
// Make sure all inputs have the same shape.
for (uint32_t i = 1; i < node->values_count(); ++i)
{
- auto in_shape = loco::shape_get(node->values(i)).as<loco::TensorShape>();
+ auto in_shape = luci::shape_get(node->values(i)).as<loco::TensorShape>();
LUCI_ASSERT(loco::NodeShape{first_shape} == loco::NodeShape{in_shape},
"All inputs must have the same shape");
}
loco::NodeShape infer_p_relu(const luci::CirclePRelu *node)
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- auto alpha_shape = loco::shape_get(node->alpha()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
+ auto alpha_shape = luci::shape_get(node->alpha()).as<loco::TensorShape>();
auto output_shape = broadcast_shape(input_shape, alpha_shape);
loco::TensorShape output_shape = shape_by_input;
// One of the dimensions can have special value -1, meaning its actual value should be inferred.
- const auto input_shape = loco::shape_get(node->tensor()).as<loco::TensorShape>();
- const uint32_t input_element_count = loco::element_count(&input_shape);
+ const auto input_shape = luci::shape_get(node->tensor()).as<loco::TensorShape>();
+ uint32_t input_element_count = 1;
uint32_t output_element_count = 1;
uint32_t unknown_dim_index = UINT32_MAX;
+ for (uint32_t i = 0; i < input_shape.rank(); ++i)
+ input_element_count *= (input_shape.dim(i).known() ? input_shape.dim(i).value() : 1);
for (uint32_t dim_index = 0; dim_index < output_shape.rank(); ++dim_index)
{
const uint32_t dim_value = output_shape.dim(dim_index).value();
loco::NodeShape infer_resize_bilinear(const luci::CircleResizeBilinear *node)
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
if (input_shape.rank() != 4)
INTERNAL_EXN("Expected ResizeBilinear input to have rank 4");
loco::NodeShape infer_resize_nearest_neighbor(const luci::CircleResizeNearestNeighbor *node)
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
if (input_shape.rank() != 4)
INTERNAL_EXN("Expected ResizeNearesNeighbor input to have rank 4");
loco::NodeShape infer_segment_sum(const luci::CircleSegmentSum *node)
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
- auto segment_shape = loco::shape_get(node->segment_ids()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
+ auto segment_shape = luci::shape_get(node->segment_ids()).as<loco::TensorShape>();
LUCI_ASSERT(segment_shape.rank() == 1, "segment_ids must be 1-D tensor");
LUCI_ASSERT(segment_shape.dim(0).value() == input_shape.dim(0).value(),
loco::NodeShape infer_select(const luci::CircleSelect *node)
{
- auto t_shape = loco::shape_get(node->t()).as<loco::TensorShape>();
- assert(t_shape == loco::shape_get(node->e()).as<loco::TensorShape>());
+ auto t_shape = luci::shape_get(node->t()).as<loco::TensorShape>();
+ assert(t_shape == luci::shape_get(node->e()).as<loco::TensorShape>());
// condition shape validation
- auto c_shape = loco::shape_get(node->condition()).as<loco::TensorShape>();
+ auto c_shape = luci::shape_get(node->condition()).as<loco::TensorShape>();
if (c_shape.rank() != t_shape.rank())
{
if (c_shape.rank() != 0 && c_shape.rank() != 1)
loco::NodeShape infer_select_v2(const luci::CircleSelectV2 *node)
{
- auto c_shape = loco::shape_get(node->condition()).as<loco::TensorShape>();
- auto t_shape = loco::shape_get(node->t()).as<loco::TensorShape>();
- auto e_shape = loco::shape_get(node->e()).as<loco::TensorShape>();
+ auto c_shape = luci::shape_get(node->condition()).as<loco::TensorShape>();
+ auto t_shape = luci::shape_get(node->t()).as<loco::TensorShape>();
+ auto e_shape = luci::shape_get(node->e()).as<loco::TensorShape>();
// validate ability to broadcast shapes to each other
auto b_shape = broadcast_shape(broadcast_shape(c_shape, t_shape), e_shape);
loco::NodeShape infer_shape(const luci::CircleShape *node)
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
loco::TensorShape output_shape;
const loco::DataType S32 = loco::DataType::S32;
const loco::DataType S64 = loco::DataType::S64;
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
auto const_begin = loco::must_cast<luci::CircleConst *>(node->begin());
auto const_size = loco::must_cast<luci::CircleConst *>(node->size());
{
const loco::DataType S32 = loco::DataType::S32;
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
// Support only input rank is 3 and 4
assert(input_shape.rank() == 3 || input_shape.rank() == 4);
auto const_paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
LUCI_ASSERT(const_paddings->dtype() == S32, "Only support int32 paddings");
- auto const_block_shape_shape = loco::shape_get(const_block_shape).as<loco::TensorShape>();
- auto const_paddings_shape = loco::shape_get(const_paddings).as<loco::TensorShape>();
+ auto const_block_shape_shape = luci::shape_get(const_block_shape).as<loco::TensorShape>();
+ auto const_paddings_shape = luci::shape_get(const_paddings).as<loco::TensorShape>();
assert(const_block_shape_shape.rank() == 1);
assert(const_paddings_shape.rank() == 2);
loco::NodeShape infer_space_to_depth(const luci::CircleSpaceToDepth *node)
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
LUCI_ASSERT(input_shape.rank() == 4, "Only input rank 4 is supported");
// Only data format NHWC is supported
auto output_shape_node = dynamic_cast<luci::CircleConst *>(node->output_shape());
if (output_shape_node != nullptr)
{
- // Only support node with S32
- LUCI_ASSERT(output_shape_node->dtype() == loco::DataType::S32,
- "Only support int32 CircleConst");
+ const auto output_shape_type = output_shape_node->dtype();
if (output_shape_node->rank() != 1)
INTERNAL_EXN_V("Only support rank 1 CircleConst",
oops::to_uint32(output_shape_node->rank()));
- shape.rank(output_shape_node->size<loco::DataType::S32>());
+ if (output_shape_type == loco::DataType::S32)
+ {
+ shape.rank(output_shape_node->size<loco::DataType::S32>());
- for (uint32_t axis = 0; axis < shape.rank(); ++axis)
+ for (uint32_t axis = 0; axis < shape.rank(); ++axis)
+ {
+ shape.dim(axis) = output_shape_node->at<loco::DataType::S32>(axis);
+ }
+ }
+ else if (output_shape_type == loco::DataType::S64)
{
- shape.dim(axis) = output_shape_node->at<loco::DataType::S32>(axis);
+ shape.rank(output_shape_node->size<loco::DataType::S64>());
+
+ for (uint32_t axis = 0; axis < shape.rank(); ++axis)
+ {
+ shape.dim(axis) = output_shape_node->at<loco::DataType::S64>(axis);
+ }
+ }
+ else
+ {
+ INTERNAL_EXN("Output shape of SparseToDense must be either int32 or int64");
}
}
else
loco::NodeShape infer_squeeze(const luci::CircleSqueeze *node)
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
// TODO input shape may be unknown before runtime
std::vector<bool> do_squeeze(input_shape.rank(), false);
{
const loco::DataType S32 = loco::DataType::S32;
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
auto multiples = loco::must_cast<luci::CircleConst *>(node->multiples());
// TODO support non-const case
loco::NodeShape infer_transpose(const luci::CircleTranspose *node)
{
- auto input_shape = loco::shape_get(node->a()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->a()).as<loco::TensorShape>();
auto perm_node = loco::must_cast<luci::CircleConst *>(node->perm());
// CircleUnpack provides list(array) of Tensors which has one less dimension of the input
// We'll set shape of CircleUnpack to shape of actual outputs
// TODO fix this if any problem rises
- auto value_shape = loco::shape_get(node->value()).as<loco::TensorShape>();
+ auto value_shape = luci::shape_get(node->value()).as<loco::TensorShape>();
auto axis = node->axis();
auto num = node->num();
loco::NodeShape infer_unidirectionalsequencelstm(const luci::CircleUnidirectionalSequenceLSTM *node)
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
auto recurrent_to_output_weights =
- loco::shape_get(node->recurrent_to_output_weights()).as<loco::TensorShape>();
+ luci::shape_get(node->recurrent_to_output_weights()).as<loco::TensorShape>();
auto rank = input_shape.rank();
loco::TensorShape output_shape;
output_shape.rank(rank);
loco::NodeShape infer_unique(const luci::CircleUnique *node)
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
assert(input_shape.rank() == 1);
{
loco::TensorShape out_shape;
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
auto weights_clusters = loco::must_cast<luci::CircleConst *>(node->weights_clusters());
LUCI_ASSERT(input_shape.rank() == 2, "Input rank of BCQFullyConnected should be 2");
loco::TensorShape input_shape;
loco::TensorShape output_shape;
- const auto input_binary_shape = loco::shape_get(node->input_binary()).as<loco::TensorShape>();
- const auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+ const auto input_binary_shape = luci::shape_get(node->input_binary()).as<loco::TensorShape>();
+ const auto indices_shape = luci::shape_get(node->indices()).as<loco::TensorShape>();
auto axis = node->axis();
auto input_clusters = loco::must_cast<luci::CircleConst *>(node->input_clusters());
return loco::NodeShape{*output_shape};
}
-loco::NodeShape infer_if_out(const luci::CircleIfOut *node)
-{
- /**
- * @note IF operator type and shape are that of the "then" and "else"
- * Graph Outputs.
- */
- auto circle_if = dynamic_cast<const luci::CircleIf *>(node->input());
- if (circle_if == nullptr)
- {
- INTERNAL_EXN("CircleIf IR is not configured correctly");
- }
-
- auto index = node->index();
- auto then_graph = circle_if->then_graph();
- auto else_graph = circle_if->else_graph();
- assert(then_graph != nullptr);
- assert(else_graph != nullptr);
-
- // shape and type are assumed to be same
- // these are checked at post_import_graph() in Import
- auto then_outputs = loco::output_nodes(then_graph);
- auto else_outputs = loco::output_nodes(else_graph);
- assert(then_outputs.size() == else_outputs.size());
- assert(index < static_cast<int32_t>(then_outputs.size()));
-
- auto then_out = loco::must_cast<luci::CircleOutput *>(then_outputs.at(index));
- auto else_out = loco::must_cast<luci::CircleOutput *>(else_outputs.at(index));
-
- auto then_graph_outputs = then_graph->outputs(); // loco::GraphOutput items
- auto else_graph_outputs = else_graph->outputs();
- assert(then_graph_outputs->size() == else_graph_outputs->size());
-
- auto then_graph_output = then_graph_outputs->at(then_out->index());
- auto else_graph_output = else_graph_outputs->at(else_out->index());
- (void)else_graph_output; // make compiler happy for unused variable warnings
- assert(*then_graph_output->shape() == *else_graph_output->shape());
-
- return loco::NodeShape{*then_graph_output->shape()};
-}
-
loco::NodeShape infer_non_max_suppression_v4_out(const luci::CircleNonMaxSuppressionV4Out *node)
{
const loco::DataType S32 = loco::DataType::S32;
loco::NodeShape unknown;
- auto split_shape = loco::shape_get(split).as<loco::TensorShape>();
+ auto split_shape = luci::shape_get(split).as<loco::TensorShape>();
auto split_dim = dynamic_cast<const luci::CircleConst *>(split->split_dim());
if (split_dim == nullptr)
loco::NodeShape unknown;
- auto split_shape = loco::shape_get(split).as<loco::TensorShape>();
+ auto split_shape = luci::shape_get(split).as<loco::TensorShape>();
auto size_splits = dynamic_cast<const luci::CircleConst *>(split->size_splits());
if (size_splits == nullptr)
INTERNAL_EXN("CircleSplit IR is not configured correctly");
// shape of topkv2 is same as topkv2->input()
- auto input_shape = loco::shape_get(topkv2).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(topkv2).as<loco::TensorShape>();
auto node_k = loco::must_cast<const luci::CircleConst *>(topkv2->k());
LUCI_ASSERT(node_k->dtype() == S32, "Only support Int32");
}
assert(node->index() == 1);
auto unique = loco::must_cast<luci::CircleUnique *>(node->input());
- auto unique_shape = loco::shape_get(unique->input()).as<loco::TensorShape>();
+ auto unique_shape = luci::shape_get(unique->input()).as<loco::TensorShape>();
assert(unique_shape.rank() == 1);
INTERNAL_EXN("CircleUnpack IR is not configured correctly");
}
- auto unpack_shape = loco::shape_get(unpack).as<loco::TensorShape>();
+ auto unpack_shape = luci::shape_get(unpack).as<loco::TensorShape>();
return loco::NodeShape{unpack_shape};
}
loco::NodeShape visit(const luci::CircleBatchMatMul *node) final
{
- auto x_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
- auto y_shape = loco::shape_get(node->y()).as<loco::TensorShape>();
+ auto x_shape = luci::shape_get(node->x()).as<loco::TensorShape>();
+ auto y_shape = luci::shape_get(node->y()).as<loco::TensorShape>();
return infer_batchmatmul_shape(x_shape, y_shape, node->adj_x(), node->adj_y());
}
loco::NodeShape visit(const luci::CircleDequantize *node) final
{
- const auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ const auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
return loco::NodeShape{input_shape};
}
loco::NodeShape visit(const luci::CircleElu *node) final
{
- auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->features()).as<loco::TensorShape>();
return loco::NodeShape{input_shape};
}
return infer_expand_dims(node);
}
+ loco::NodeShape visit(const luci::CircleFakeQuant *node) final { return use_inputs(node); }
+
loco::NodeShape visit(const luci::CircleFill *node) final { return infer_fill(node); }
loco::NodeShape visit(const luci::CircleFloor *node) final { return use_x(node); }
{
// Shape of CircleIf is not used. Just use input 0
assert(node->input_count() > 0);
- const auto input_shape = loco::shape_get(node->input(0)).as<loco::TensorShape>();
+ const auto input_shape = luci::shape_get(node->input(0)).as<loco::TensorShape>();
return loco::NodeShape{input_shape};
}
loco::NodeShape visit(const luci::CircleLeakyRelu *node) final
{
- const auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
+ const auto input_shape = luci::shape_get(node->features()).as<loco::TensorShape>();
return loco::NodeShape{input_shape};
}
loco::NodeShape visit(const luci::CircleLocalResponseNormalization *node) final
{
- const auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ const auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
return loco::NodeShape{input_shape};
}
loco::NodeShape visit(const luci::CircleNonMaxSuppressionV4 *node) final
{
- const auto boxes_shape = loco::shape_get(node->boxes()).as<loco::TensorShape>();
+ const auto boxes_shape = luci::shape_get(node->boxes()).as<loco::TensorShape>();
return loco::NodeShape{boxes_shape};
}
loco::NodeShape visit(const luci::CircleNonMaxSuppressionV5 *node) final
{
- const auto boxes_shape = loco::shape_get(node->boxes()).as<loco::TensorShape>();
+ const auto boxes_shape = luci::shape_get(node->boxes()).as<loco::TensorShape>();
return loco::NodeShape{boxes_shape};
}
loco::NodeShape visit(const luci::CircleRelu *node) final
{
- auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->features()).as<loco::TensorShape>();
return loco::NodeShape{input_shape};
}
loco::NodeShape visit(const luci::CircleRelu6 *node) final
{
- auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->features()).as<loco::TensorShape>();
return loco::NodeShape{input_shape};
}
loco::NodeShape visit(const luci::CircleReluN1To1 *node) final
{
- auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->features()).as<loco::TensorShape>();
return loco::NodeShape{input_shape};
}
loco::NodeShape visit(const luci::CircleReverseSequence *node) final
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
return loco::NodeShape{input_shape};
}
loco::NodeShape visit(const luci::CircleReverseV2 *node) final
{
- auto input_shape = loco::shape_get(node->tensor()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->tensor()).as<loco::TensorShape>();
- LUCI_ASSERT(loco::shape_get(node->axis()).as<loco::TensorShape>().rank() == 1,
+ LUCI_ASSERT(luci::shape_get(node->axis()).as<loco::TensorShape>().rank() == 1,
"Tensor must be 1-D");
return loco::NodeShape{input_shape};
loco::NodeShape visit(const luci::CircleSplit *node) final
{
// We'll set Split output as same as input so that SplitOut can handle it's own shape
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
return loco::NodeShape{input_shape};
}
loco::NodeShape visit(const luci::CircleSplitV *node) final
{
// We'll set SplitV output as same as input so that SplitOut can handle it's own shape
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
return loco::NodeShape{input_shape};
}
loco::NodeShape visit(const luci::CircleTopKV2 *node) final
{
// set shape of this node as same as input
- const auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ const auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
return loco::NodeShape{input_shape};
}
{
// Shape of CircleWhile is not used. Just use input 0
assert(node->arity() > 0);
- const auto input_shape = loco::shape_get(node->input(0)).as<loco::TensorShape>();
+ const auto input_shape = luci::shape_get(node->input(0)).as<loco::TensorShape>();
return loco::NodeShape{input_shape};
}
loco::NodeShape visit(const luci::CircleZerosLike *node) final
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
return loco::NodeShape{input_shape};
}
loco::NodeShape visit(const luci::CircleInstanceNorm *node) final
{
- auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
return loco::NodeShape{input_shape};
}
loco::NodeShape visit(const luci::CircleCustomOut *node) final { return use_own(node); }
- loco::NodeShape visit(const luci::CircleIfOut *node) final { return infer_if_out(node); }
-
loco::NodeShape visit(const luci::CircleNonMaxSuppressionV4Out *node) final
{
return infer_non_max_suppression_v4_out(node);
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TestGraph.h"
-#include "luci/Service/CircleShapeInferenceRule.h"
-
-#include <luci/IR/CircleNodes.h>
-#include <luci/IR/CircleDialect.h>
-
-#include <loco.h>
-#include <loco/IR/CanonicalDialect.h>
-#include <loco/Service/ShapeInference.h>
-#include <loco/Service/CanonicalShapeInferenceRule.h>
-#include <loco/Service/MultiDialectShapeInferenceRule.h>
-
-#include <oops/InternalExn.h>
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-namespace
-{
-
-bool shape_pass(loco::Graph *g)
-{
- loco::CanonicalShapeInferenceRule canonical_rule;
- luci::CircleShapeInferenceRule circle_rule;
- loco::MultiDialectShapeInferenceRule rules;
-
- rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
- .bind(luci::CircleDialect::get(), &circle_rule);
-
- return loco::apply(&rules).to(g);
-}
-
-} // namespace
-
-TEST(CircleShapeInferenceRuleTest, minimal_with_CircleRelu)
-{
- // Create a simple network
- luci::test::TestGraph graph;
- auto relu_node = graph.append<luci::CircleRelu>(graph.input_node);
- graph.complete(relu_node);
-
- // set shape
- {
- graph.input_node->rank(2);
- graph.input_node->dim(0) = 3;
- graph.input_node->dim(1) = 4;
-
- graph.output_node->rank(2);
- graph.output_node->dim(0) = 3;
- graph.output_node->dim(1) = 4;
-
- luci::test::graph_input_shape(graph.input_node);
- luci::test::graph_output_shape(graph.output_node);
- }
-
- // pre-check
- ASSERT_FALSE(loco::shape_known(relu_node));
-
- // shape inference
- while (shape_pass(graph.graph()) == true)
- ;
-
- // Verify
- {
- ASSERT_TRUE(loco::shape_known(relu_node));
- ASSERT_EQ(loco::Domain::Tensor, loco::shape_get(relu_node).domain());
-
- auto shape = loco::shape_get(relu_node).as<loco::TensorShape>();
- ASSERT_EQ(2, shape.rank());
- ASSERT_EQ(3, shape.dim(0));
- ASSERT_EQ(4, shape.dim(1));
- }
-}
-
-// based on the case shown in
-// https://www.corvil.com/kb/what-is-the-difference-between-same-and-valid-padding-in-tf-nn-max-pool-of-tensorflow
-TEST(CircleShapeInferenceRuleTest, avgpool2d_valid)
-{
- luci::test::TestGraph graph;
- auto avg_node = graph.append<luci::CircleAveragePool2D>(graph.input_node);
- graph.complete();
-
- auto input_node = graph.input_node;
- {
- input_node->shape({1, 4, 3, 1});
- luci::test::graph_input_shape(input_node);
- }
- auto output_node = graph.output_node;
- {
- output_node->shape({1, 2, 1, 1});
- luci::test::graph_output_shape(output_node);
- }
- // setting CircleAveragePool2D
- {
- avg_node->filter()->h(2);
- avg_node->filter()->w(2);
- avg_node->stride()->h(2);
- avg_node->stride()->w(2);
- avg_node->fusedActivationFunction(luci::FusedActFunc::NONE);
- avg_node->padding(luci::Padding::VALID);
- }
- ASSERT_FALSE(loco::shape_known(avg_node));
-
- // shape inference
- while (shape_pass(graph.graph()) == true)
- ;
-
- // Verify
- {
- ASSERT_TRUE(loco::shape_known(avg_node));
- ASSERT_EQ(loco::Domain::Tensor, loco::shape_get(avg_node).domain());
-
- auto shape = loco::shape_get(avg_node).as<loco::TensorShape>();
- ASSERT_EQ(4, shape.rank());
- ASSERT_EQ(1, shape.dim(0).value());
- ASSERT_EQ(2, shape.dim(1).value());
- ASSERT_EQ(1, shape.dim(2).value());
- ASSERT_EQ(1, shape.dim(3).value());
- }
-}
-
-TEST(CircleShapeInferenceRuleTest, avgpool2d_same)
-{
- luci::test::TestGraph graph;
- auto avg_node = graph.append<luci::CircleAveragePool2D>(graph.input_node);
- graph.complete();
-
- auto input_node = graph.input_node;
- {
- input_node->shape({1, 4, 3, 1});
- luci::test::graph_input_shape(input_node);
- }
- auto output_node = graph.output_node;
- {
- output_node->shape({1, 2, 2, 1});
- luci::test::graph_output_shape(output_node);
- }
-
- // setting CircleAveragePool2D
- {
- avg_node->filter()->h(2);
- avg_node->filter()->w(2);
- avg_node->stride()->h(2);
- avg_node->stride()->w(2);
- avg_node->fusedActivationFunction(luci::FusedActFunc::NONE);
- avg_node->padding(luci::Padding::SAME);
- }
-
- ASSERT_FALSE(loco::shape_known(avg_node));
-
- // shape inference
- while (shape_pass(graph.graph()) == true)
- ;
-
- // Verify
- {
- ASSERT_TRUE(loco::shape_known(avg_node));
- ASSERT_EQ(loco::Domain::Tensor, loco::shape_get(avg_node).domain());
-
- auto shape = loco::shape_get(avg_node).as<loco::TensorShape>();
- ASSERT_EQ(4, shape.rank());
- ASSERT_EQ(1, shape.dim(0).value());
- ASSERT_EQ(2, shape.dim(1).value());
- ASSERT_EQ(2, shape.dim(2).value());
- ASSERT_EQ(1, shape.dim(3).value());
- }
-}
-
-/**
- * @note Function to test: Shape inference of two different input shapes
- *
- * Rank expansion to higher input side
- * x(2,1,5) + y(3,5) --> x(2,1,5) + y(1,3,5)
- * Do output shape inference like numpy
- * x(2,1,5) + y(1,3,5) --> output(2,3,5)
- * For each axis, dim value should be same OR one of them should be 1
- */
-TEST(CircleShapeInferenceRuleTest, TFAdd_shapeinf_different)
-{
- auto g = loco::make_graph();
-
- auto x_node = g->nodes()->create<luci::CircleInput>();
- {
- x_node->rank(3);
- x_node->dim(0) = 2;
- x_node->dim(1) = 1;
- x_node->dim(2) = 5;
- }
- auto y_node = g->nodes()->create<luci::CircleInput>();
- {
- y_node->rank(2);
- y_node->dim(0) = 3;
- y_node->dim(1) = 5;
- }
- auto add_node = g->nodes()->create<luci::CircleAdd>();
- {
- add_node->x(x_node);
- add_node->y(y_node);
- }
- auto output_node = g->nodes()->create<luci::CircleOutput>();
- {
- output_node->from(add_node);
- }
-
- auto x_input = g->inputs()->create();
- {
- x_input->name("x");
- luci::link(x_input, x_node);
- }
- auto y_input = g->inputs()->create();
- {
- y_input->name("y");
- luci::link(y_input, y_node);
- }
- auto output = g->outputs()->create();
- {
- output->name("output");
- luci::link(output, output_node);
- }
-
- luci::test::graph_input_shape(x_node);
- luci::test::graph_input_shape(y_node);
- luci::test::graph_output_shape(output_node);
-
- // pre-check
- ASSERT_FALSE(loco::shape_known(add_node));
-
- // shape inference
- while (shape_pass(g.get()) == true)
- ;
-
- // Verify
- {
- ASSERT_TRUE(loco::shape_known(add_node));
- ASSERT_EQ(loco::Domain::Tensor, loco::shape_get(add_node).domain());
-
- auto shape = loco::shape_get(add_node).as<loco::TensorShape>();
- ASSERT_EQ(3, shape.rank());
- ASSERT_EQ(2, shape.dim(0));
- ASSERT_EQ(3, shape.dim(1));
- ASSERT_EQ(5, shape.dim(2));
- }
-}
-
-TEST(CircleShapeInferenceRuleTest, CircleTranspose_simple)
-{
- luci::test::ExampleGraph<luci::test::ExampleGraphType::CircleTranspose> g;
-
- g.input_node->rank(3);
- g.input_node->dim(0) = 3;
- g.input_node->dim(1) = 8;
- g.input_node->dim(2) = 1;
-
- g.const_perm->dtype(loco::DataType::S32);
- g.const_perm->rank(1);
- g.const_perm->dim(0) = 3;
- g.const_perm->size<loco::DataType::S32>(3);
- g.const_perm->at<loco::DataType::S32>(0) = 1;
- g.const_perm->at<loco::DataType::S32>(1) = 2;
- g.const_perm->at<loco::DataType::S32>(2) = 0;
-
- luci::test::graph_input_shape(g.input_node);
- luci::test::graph_output_shape(g.output_node);
-
- // pre-check
- ASSERT_FALSE(loco::shape_known(g.transpose_node));
-
- // shape inference
- while (shape_pass(g.graph()) == true)
- ;
-
- // Verify
- {
- ASSERT_TRUE(loco::shape_known(g.transpose_node));
-
- auto shape = loco::shape_get(g.transpose_node).as<loco::TensorShape>();
- ASSERT_EQ(3, shape.rank());
- ASSERT_EQ(8, shape.dim(0));
- ASSERT_EQ(1, shape.dim(1));
- ASSERT_EQ(3, shape.dim(2));
- }
-}
-
-TEST(CircleShapeInferenceRuleTest, CircleSqueeze)
-{
- luci::test::TestGraph graph;
- auto squeeze_node = graph.append<luci::CircleSqueeze>(graph.input_node);
- graph.complete();
-
- auto input_node = graph.input_node;
- {
- input_node->shape({1, 4, 3, 1});
- }
- auto output_node = graph.output_node;
- {
- output_node->shape({4, 3, 1});
- }
-
- luci::test::graph_input_shape(input_node);
- luci::test::graph_output_shape(output_node);
-
- squeeze_node->squeeze_dims({0});
-
- // pre-check
- ASSERT_FALSE(loco::shape_known(squeeze_node));
-
- // shape inference
- while (shape_pass(graph.graph()) == true)
- ;
-
- // Verify
- {
- ASSERT_TRUE(loco::shape_known(squeeze_node));
-
- auto shape = loco::shape_get(squeeze_node).as<loco::TensorShape>();
- ASSERT_EQ(3, shape.rank());
- ASSERT_EQ(4, shape.dim(0));
- ASSERT_EQ(3, shape.dim(1));
- ASSERT_EQ(1, shape.dim(2));
- }
-}
-
-TEST(CircleShapeInferenceRuleTest, CircleExpandDims)
-{
- luci::test::TestGraph graph;
- auto axis = graph.append<luci::CircleConst>();
- axis->dtype(loco::DataType::S32);
- axis->rank(0);
- axis->size<loco::DataType::S32>(1);
- axis->at<loco::DataType::S32>(0) = 1;
-
- auto expand_dims = graph.append<luci::CircleExpandDims>(graph.input_node, axis);
- graph.complete();
-
- auto input_node = graph.input_node;
- {
- input_node->shape({4, 3});
- }
-
- auto output_node = graph.output_node;
- {
- output_node->from(expand_dims);
- }
-
- luci::test::graph_input_shape(input_node);
- luci::test::graph_output_shape(output_node);
-
- // shape inference
- while (shape_pass(graph.graph()))
- ;
-
- // validation
- {
- ASSERT_TRUE(loco::shape_known(expand_dims));
-
- auto shape = loco::shape_get(expand_dims).as<loco::TensorShape>();
-
- ASSERT_EQ(3, shape.rank());
- ASSERT_EQ(4, shape.dim(0));
- ASSERT_EQ(1, shape.dim(1));
- ASSERT_EQ(3, shape.dim(2));
- }
-}
-
-TEST(CircleShapeInferenceRuleTest, CircleSqueezeAll)
-{
- luci::test::TestGraph graph;
- auto squeeze_node = graph.append<luci::CircleSqueeze>(graph.input_node);
- graph.complete();
-
- auto input_node = graph.input_node;
- {
- input_node->shape({1, 4, 3, 1});
- }
- auto output_node = graph.output_node;
- {
- input_node->shape({4, 3});
- }
-
- luci::test::graph_input_shape(input_node);
- luci::test::graph_output_shape(output_node);
-
- squeeze_node->squeeze_dims({});
-
- // pre-check
- ASSERT_FALSE(loco::shape_known(squeeze_node));
-
- // shape inference
- while (shape_pass(graph.graph()) == true)
- ;
-
- // Verify
- {
- ASSERT_TRUE(loco::shape_known(squeeze_node));
-
- auto shape = loco::shape_get(squeeze_node).as<loco::TensorShape>();
- ASSERT_EQ(2, shape.rank());
- ASSERT_EQ(4, shape.dim(0));
- ASSERT_EQ(3, shape.dim(1));
- }
-}
-
-TEST(CircleShapeInferenceRuleTest, CircleGatherNd_simple)
-{
- luci::test::TestGraph graph;
- auto indices_const = graph.append<luci::CircleConst>();
- auto gather_nd_node = graph.append<luci::CircleGatherNd>(graph.input_node, indices_const);
- graph.complete();
-
- {
- auto input_node = graph.input_node;
- input_node->shape({1, 4, 4, 3});
- luci::test::graph_input_shape(input_node);
- }
- {
- auto output_node = graph.output_node;
- output_node->shape({1, 2, 2, 3});
- luci::test::graph_output_shape(output_node);
- }
-
- {
- indices_const->shape({1, 2, 3});
- }
-
- // pre-check
- ASSERT_FALSE(loco::shape_known(gather_nd_node));
-
- // shape inference
- while (shape_pass(graph.graph()) == true)
- ;
-
- // Verify
- {
- ASSERT_TRUE(loco::shape_known(gather_nd_node));
-
- auto shape = loco::shape_get(gather_nd_node).as<loco::TensorShape>();
- ASSERT_EQ(3, shape.rank());
- ASSERT_EQ(1, shape.dim(0));
- ASSERT_EQ(2, shape.dim(1));
- ASSERT_EQ(3, shape.dim(2));
- }
-}
-
-TEST(CircleShapeInferenceRuleTest, CircleGatherNd_slices)
-{
- luci::test::TestGraph graph;
- auto indices_const = graph.append<luci::CircleConst>();
- auto gather_nd_node = graph.append<luci::CircleGatherNd>(graph.input_node, indices_const);
- graph.complete();
-
- {
- auto input_node = graph.input_node;
- input_node->shape({1, 4, 4, 3});
- luci::test::graph_input_shape(input_node);
- }
- {
- auto output_node = graph.output_node;
- output_node->shape({1, 2, 4, 4, 3});
- luci::test::graph_output_shape(output_node);
- }
-
- {
- indices_const->shape({1, 2, 1});
- }
-
- // pre-check
- ASSERT_FALSE(loco::shape_known(gather_nd_node));
-
- // shape inference
- while (shape_pass(graph.graph()) == true)
- ;
-
- // Verify
- {
- ASSERT_TRUE(loco::shape_known(gather_nd_node));
-
- auto shape = loco::shape_get(gather_nd_node).as<loco::TensorShape>();
- ASSERT_EQ(5, shape.rank());
- ASSERT_EQ(1, shape.dim(0));
- ASSERT_EQ(2, shape.dim(1));
- ASSERT_EQ(4, shape.dim(2));
- ASSERT_EQ(4, shape.dim(3));
- ASSERT_EQ(3, shape.dim(4));
- }
-}
-
-TEST(CircleShapeInferenceRuleTest, CircleGatherNd_NEG)
-{
- luci::test::TestGraph graph;
- auto indices_const = graph.append<luci::CircleConst>();
- auto gather_nd_node = graph.append<luci::CircleGatherNd>(graph.input_node, indices_const);
- graph.complete();
-
- {
- auto input_node = graph.input_node;
- input_node->shape({1, 4, 4, 3});
- luci::test::graph_input_shape(input_node);
- }
- {
- // Does not matter, because test should fail anyway
- auto output_node = graph.output_node;
- output_node->shape({0, 0, 0});
- luci::test::graph_output_shape(output_node);
- }
-
- {
- indices_const->shape({1, 2, 5});
- }
-
- // pre-check
- ASSERT_FALSE(loco::shape_known(gather_nd_node));
-
- // had to pack into lambda to check throw
- auto lambda = [&]() {
- // shape inference
- while (shape_pass(graph.graph()) == true)
- ;
- };
-
- ASSERT_THROW(lambda(), oops::InternalExn);
-}
-
-TEST(CircleShapeInferenceRuleTest, CircleResizeNearestNeighbor)
-{
- luci::test::TestGraph graph;
- auto size_const = graph.append<luci::CircleConst>();
- size_const->dtype(loco::DataType::S32);
- size_const->rank(1);
- size_const->dim(0) = 2;
- size_const->size<loco::DataType::S32>(2);
- size_const->at<loco::DataType::S32>(0) = 16;
- size_const->at<loco::DataType::S32>(1) = 16;
- auto resize_node = graph.append<luci::CircleResizeNearestNeighbor>(graph.input_node, size_const);
- graph.complete();
-
- {
- auto input_node = graph.input_node;
- input_node->shape({1, 4, 4, 3});
- luci::test::graph_input_shape(input_node);
- }
- {
- auto output_node = graph.output_node;
- output_node->from(resize_node);
- luci::test::graph_output_shape(output_node);
- }
-
- // pre-check
- ASSERT_FALSE(loco::shape_known(resize_node));
-
- // shape inference
- while (shape_pass(graph.graph()) == true)
- ;
-
- // Verify
- {
- ASSERT_TRUE(loco::shape_known(resize_node));
-
- auto shape = loco::shape_get(resize_node).as<loco::TensorShape>();
- ASSERT_EQ(4, shape.rank());
- ASSERT_EQ(1, shape.dim(0));
- ASSERT_EQ(16, shape.dim(1));
- ASSERT_EQ(16, shape.dim(2));
- ASSERT_EQ(3, shape.dim(3));
- }
-}
-
-TEST(CircleShapeInferenceRuleTest, CircleResizeBilinear)
-{
- luci::test::TestGraph graph;
- auto size_const = graph.append<luci::CircleConst>();
- size_const->dtype(loco::DataType::S32);
- size_const->rank(1);
- size_const->dim(0) = 2;
- size_const->size<loco::DataType::S32>(2);
- size_const->at<loco::DataType::S32>(0) = 16;
- size_const->at<loco::DataType::S32>(1) = 16;
- auto resize_node = graph.append<luci::CircleResizeBilinear>(graph.input_node, size_const);
- graph.complete();
-
- {
- auto input_node = graph.input_node;
- input_node->shape({1, 4, 4, 3});
- luci::test::graph_input_shape(input_node);
- }
- {
- auto output_node = graph.output_node;
- output_node->from(resize_node);
- luci::test::graph_output_shape(output_node);
- }
-
- // pre-check
- ASSERT_FALSE(loco::shape_known(resize_node));
-
- // shape inference
- while (shape_pass(graph.graph()) == true)
- ;
-
- // Verify
- {
- ASSERT_TRUE(loco::shape_known(resize_node));
-
- auto shape = loco::shape_get(resize_node).as<loco::TensorShape>();
- ASSERT_EQ(4, shape.rank());
- ASSERT_EQ(1, shape.dim(0));
- ASSERT_EQ(16, shape.dim(1));
- ASSERT_EQ(16, shape.dim(2));
- ASSERT_EQ(3, shape.dim(3));
- }
-}
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci/Service/CircleShapeSignatureInference.h"
-
-#include <luci/Log.h>
-
-namespace
-{
-
-std::ostream &operator<<(std::ostream &os, const luci::ShapeSignature &shape_signature)
-{
- os << "[";
- for (uint32_t r = 0; r < shape_signature.rank(); ++r)
- {
- if (r)
- os << ",";
- os << shape_signature.dim(r);
- }
- os << "]";
- return os;
-}
-
-} // namespace
-
-namespace luci
-{
-
-namespace ssinf
-{
-
-bool Rule::infer(const luci::CircleNode *circle_node, ShapeSignature &shape_signature) const
-{
- LOGGER(l);
-
- // There is nothing to check before ShapeSignatureInference.
-
- Algorithm alg;
-
- shape_signature = circle_node->accept(&alg);
-
- VERBOSE(l, 1) << "[luci] Shape Signature( " << circle_node->name() << " )";
- VERBOSE(l, 1) << " before: " << circle_node->shape_signature();
- VERBOSE(l, 1) << " after: " << shape_signature;
-
- return true;
-}
-
-} // namespace ssinf
-
-} // namespace luci
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci/Service/CircleShapeSignatureInferenceHelper.h"
-
-#include <loco.h>
-
-#include <luci/Log.h>
-
-#include <oops/InternalExn.h>
-
-namespace luci
-{
-
-namespace ssinf
-{
-
-luci::ShapeSignature legalized_signature(const luci::ShapeSignature &signature)
-{
- // If shape signature has at least one -1, it is not static.
- for (uint32_t i = 0; i < signature.rank(); ++i)
- if (signature.dim(i) == -1)
- return signature;
-
- // If all dimensions are static, return empty shape signature.
- return luci::ShapeSignature();
-}
-
-ShapeSignature reduced_signature(const loco::Node *node, const loco::Node *indices, bool keep_dims)
-{
- LOGGER(l);
-
- ShapeSignature input_signature;
- ShapeSignature output_signature;
-
- auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
- if (circle_node->shape_signature().rank() > 0)
- input_signature = circle_node->shape_signature();
- else
- {
- input_signature.rank(circle_node->rank());
- for (uint32_t i = 0; i < circle_node->rank(); ++i)
- input_signature.dim(i) = circle_node->dim(i).value();
- }
-
- // If input rank is 0, it means that one of following case is occurred.
- // - Input is scalar : result is always scalar
- // - Input shape signature is not inferenced : cannot infer output shape signauture
- // Therefore, when input signature rank is 0, always return empty signature.
- if (input_signature.rank() == 0)
- return output_signature;
-
- // When reduction_indices is not constant
- auto reduction_indices = dynamic_cast<const luci::CircleConst *>(indices);
- if (reduction_indices == nullptr)
- {
- if (keep_dims)
- {
- // If keep_dims is true, rank is not changed.
- output_signature.rank(input_signature.rank());
- for (uint32_t i = 0; i < output_signature.rank(); ++i)
- output_signature.dim(i) = -1;
- }
- else
- {
- // There is no way to inference for this case.
- // Do nothing to return empty signature.
- INFO(l) << "[CircleShapeSignatureInferenceHelper] " << circle_node->name() << std::endl;
- INFO(l) << " reduced_signature : cannot infer because of non-constant node" << std::endl;
- }
-
- return output_signature;
- }
-
- std::vector<int32_t> reduction_values;
- if (reduction_indices->dtype() == loco::DataType::S32)
- {
- auto reduction_size = reduction_indices->size<loco::DataType::S32>();
- for (uint32_t i = 0; i < reduction_size; ++i)
- {
- int32_t axis = reduction_indices->at<loco::DataType::S32>(i);
- if (axis < 0)
- axis += input_signature.rank();
-
- if (!(0 <= axis && axis < static_cast<int32_t>(input_signature.rank())))
- INTERNAL_EXN_V("Invalid reduction axis for REDUCER", oops::to_uint32(axis));
-
- reduction_values.push_back(axis);
- }
- }
- else if (reduction_indices->dtype() == loco::DataType::S64)
- {
- auto reduction_size = reduction_indices->size<loco::DataType::S64>();
- for (uint32_t i = 0; i < reduction_size; ++i)
- {
- int32_t axis = static_cast<int32_t>(reduction_indices->at<loco::DataType::S64>(i));
- if (axis < 0)
- axis += input_signature.rank();
-
- if (!(0 <= axis && axis < static_cast<int32_t>(input_signature.rank())))
- INTERNAL_EXN_V("Invalid reduction axis for REDUCER", oops::to_uint32(axis));
-
- reduction_values.push_back(axis);
- }
- }
- else
- {
- INTERNAL_EXN("Wrong reduction axis type, Only INT32, INT64 supported.");
- }
-
- if (keep_dims)
- {
- output_signature.rank(input_signature.rank());
- for (uint32_t i = 0; i < input_signature.rank(); ++i)
- output_signature.dim(i) = input_signature.dim(i);
- for (uint32_t i = 0; i < reduction_values.size(); ++i)
- output_signature.dim(reduction_values.at(i)) = 1;
- }
- else
- {
- std::vector<bool> check_reduce(input_signature.rank(), false);
- for (uint32_t i = 0; i < reduction_values.size(); ++i)
- check_reduce.at(reduction_values.at(i)) = true;
-
- uint32_t reduce_cnt = 0;
- for (uint32_t i = 0; i < check_reduce.size(); ++i)
- if (check_reduce.at(i))
- ++reduce_cnt;
-
- output_signature.rank(input_signature.rank() - reduce_cnt);
- for (uint32_t i = 0, j = 0; i < check_reduce.size(); ++i)
- if (check_reduce.at(i) == false)
- output_signature.dim(j++) = input_signature.dim(i);
- }
-
- return output_signature;
-}
-
-ShapeSignature input_arg_signature(const luci::CircleNode *node, uint32_t index)
-{
- auto circle_input = loco::must_cast<luci::CircleNode *>(node->arg(index));
- return circle_input->shape_signature();
-}
-
-} // namespace ssinf
-
-} // namespace luci
*/
#include "luci/Service/CircleTypeInference.h"
+#include "CircleTypeInferenceHelper.h"
#include <luci/Log.h>
#include <loco.h>
-#include <loco/Service/TypeInference.h>
-
-#include <mio/circle/schema_generated.h>
-#include <oops/InternalExn.h>
#include <type_traits>
namespace
{
-circle::TensorType translateLocoTypeToCircle(loco::DataType dtype)
-{
- switch (dtype)
- {
- case loco::DataType::U8:
- return circle::TensorType_UINT8;
- // case loco::DataType::U16: unsupported
- // case loco::DataType::U32: unsupported
- // case loco::DataType::U64: unsupported
- case loco::DataType::S8:
- return circle::TensorType_INT8;
- case loco::DataType::S16:
- return circle::TensorType_INT16;
- case loco::DataType::S32:
- return circle::TensorType_INT32;
- case loco::DataType::S64:
- return circle::TensorType_INT64;
- case loco::DataType::FLOAT16:
- return circle::TensorType_FLOAT16;
- case loco::DataType::FLOAT32:
- return circle::TensorType_FLOAT32;
- // case loco::DataType::FLOAT64: unsupported
- case loco::DataType::BOOL:
- return circle::TensorType_BOOL;
- default:
- break;
- }
-
- INTERNAL_EXN_V("Invalid loco dtype", oops::to_uint32(dtype));
-}
-
-} // namespace
-
-namespace luci
-{
-
-circle::TensorType TypeInference::get(loco::Node *node)
-{
- assert(loco::dtype_known(node));
- return translateLocoTypeToCircle(loco::dtype_get(node));
-}
-
-} // namespace luci
-
-namespace
-{
-
bool inputs_dtype_ready(const luci::CircleNode *node)
{
for (uint32_t arity = 0; arity < node->arity(); ++arity)
{
- if (node->dtype() == loco::DataType::Unknown)
+ auto input_node = loco::must_cast<luci::CircleNode *>(node->arg(arity));
+ if (input_node->dtype() == loco::DataType::Unknown)
return false;
}
* limitations under the License.
*/
-#include "luci/Service/CircleTypeInferenceHelper.h"
+#include "CircleTypeInferenceHelper.h"
+
+namespace luci
+{
+
+loco::DataType dtype_get(const loco::Node *node)
+{
+ assert(luci::dtype_known(node));
+ return loco::must_cast<const luci::CircleNode *>(node)->dtype();
+}
+
+bool dtype_known(const loco::Node *node)
+{
+ return loco::must_cast<const luci::CircleNode *>(node)->dtype() != loco::DataType::Unknown;
+}
+
+} // namespace luci
namespace luci
{
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_TYPE_INFERENCE_HELPER_H__
+#define __LUCI_CIRCLE_TYPE_INFERENCE_HELPER_H__
+
+#include <luci/IR/CircleNodes.h>
+
+#include <loco/IR/DataType.h>
+
+namespace luci
+{
+
+// NOTE Functions in this namespace will be removed after new inference
+// algorithms are fully implemented.
+
+// This function is temporary function for deprecating loco::dtype_get
+loco::DataType dtype_get(const loco::Node *node);
+
+// This function is temporary function for deprecating loco::dtype_known
+bool dtype_known(const loco::Node *node);
+
+} // namespace luci
+
+namespace luci
+{
+namespace tinf // Namespace for Type Inference
+{
+
+// Helper function will be added
+
+} // namespace tinf
+} // namespace luci
+
+#endif // __LUCI_CIRCLE_TYPE_INFERENCE_HELPER_H__
*/
#include "luci/Service/CircleTypeInferenceRule.h"
+#include "CircleTypeInferenceHelper.h"
#include <luci/IR/CircleDialect.h>
#include <luci/IR/CircleNodeVisitor.h>
{
// TODO Given a tensor x of complex numbers, Abs operation returns a tensor of type float32 or
// float64.
- loco::DataType visit(const luci::CircleAbs *node) final { return loco::dtype_get(node->x()); }
+ loco::DataType visit(const luci::CircleAbs *node) final { return luci::dtype_get(node->x()); }
- loco::DataType visit(const luci::CircleAdd *node) final { return loco::dtype_get(node->x()); }
+ loco::DataType visit(const luci::CircleAdd *node) final { return luci::dtype_get(node->x()); }
loco::DataType visit(const luci::CircleAddN *node) final
{
- auto dtype = loco::dtype_get(node->inputs(0));
+ auto dtype = luci::dtype_get(node->inputs(0));
for (uint32_t idx = 1; idx < node->arity(); ++idx)
{
- auto dtype_idx = loco::dtype_get(node->inputs(idx));
+ auto dtype_idx = luci::dtype_get(node->inputs(idx));
if (dtype != dtype_idx)
{
INTERNAL_EXN_V("ADD_N dtype not same as the first input: ", idx);
}
}
- return loco::dtype_get(node->inputs(0));
+ return luci::dtype_get(node->inputs(0));
}
loco::DataType visit(const luci::CircleArgMax *node) final { return node->output_type(); }
loco::DataType visit(const luci::CircleAveragePool2D *node) final
{
- return loco::dtype_get(node->value());
+ return luci::dtype_get(node->value());
}
loco::DataType visit(const luci::CircleBatchMatMul *node) final
{
- return loco::dtype_get(node->x());
+ return luci::dtype_get(node->x());
}
loco::DataType visit(const luci::CircleBatchToSpaceND *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
loco::DataType visit(const luci::CircleCast *node) final { return node->dtype(); }
- loco::DataType visit(const luci::CircleCeil *node) final { return loco::dtype_get(node->x()); }
+ loco::DataType visit(const luci::CircleCeil *node) final { return luci::dtype_get(node->x()); }
loco::DataType visit(const luci::CircleConcatenation *node) final
{
assert(node->numValues() > 0);
for (uint32_t i = 1; i < node->numValues(); ++i)
- assert(loco::dtype_get(node->values(i - 1)) == loco::dtype_get(node->values(i)));
+ assert(luci::dtype_get(node->values(i - 1)) == luci::dtype_get(node->values(i)));
- return loco::dtype_get(node->values(0));
+ return luci::dtype_get(node->values(0));
}
loco::DataType visit(const luci::CircleConst *node) final { return node->dtype(); }
loco::DataType visit(const luci::CircleConv2D *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
- loco::DataType visit(const luci::CircleCos *node) final { return loco::dtype_get(node->x()); }
+ loco::DataType visit(const luci::CircleCos *node) final { return luci::dtype_get(node->x()); }
loco::DataType visit(const luci::CircleCustom *node) final
{
if (node->custom_code() == "BatchMatMulV2")
{
- return loco::dtype_get(node->inputs(0));
+ return luci::dtype_get(node->inputs(0));
}
return node->dtype();
}
loco::DataType visit(const luci::CircleDepthToSpace *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
loco::DataType visit(const luci::CircleDepthwiseConv2D *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
loco::DataType visit(const luci::CircleDequantize *) final { return loco::DataType::FLOAT32; }
- loco::DataType visit(const luci::CircleDiv *node) final { return loco::dtype_get(node->x()); }
+ loco::DataType visit(const luci::CircleDiv *node) final { return luci::dtype_get(node->x()); }
loco::DataType visit(const luci::CircleElu *node) final
{
- return loco::dtype_get(node->features());
+ return luci::dtype_get(node->features());
}
loco::DataType visit(const luci::CircleEqual *) final { return loco::DataType::BOOL; }
- loco::DataType visit(const luci::CircleExp *node) final { return loco::dtype_get(node->x()); }
+ loco::DataType visit(const luci::CircleExp *node) final { return luci::dtype_get(node->x()); }
loco::DataType visit(const luci::CircleExpandDims *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
+ }
+
+ loco::DataType visit(const luci::CircleFakeQuant *node) final
+ {
+ return luci::dtype_get(node->inputs());
}
loco::DataType visit(const luci::CircleFill *node) final
{
- return loco::dtype_get(node->value());
+ return luci::dtype_get(node->value());
}
- loco::DataType visit(const luci::CircleFloor *node) final { return loco::dtype_get(node->x()); }
+ loco::DataType visit(const luci::CircleFloor *node) final { return luci::dtype_get(node->x()); }
loco::DataType visit(const luci::CircleFloorDiv *node) final
{
- return loco::dtype_get(node->x());
+ return luci::dtype_get(node->x());
}
loco::DataType visit(const luci::CircleFloorMod *node) final
{
- return loco::dtype_get(node->x());
+ return luci::dtype_get(node->x());
}
loco::DataType visit(const luci::CircleFullyConnected *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
loco::DataType visit(const luci::CircleGather *node) final
{
- return loco::dtype_get(node->params());
+ return luci::dtype_get(node->params());
}
loco::DataType visit(const luci::CircleGatherNd *node) final
{
- return loco::dtype_get(node->params());
+ return luci::dtype_get(node->params());
}
loco::DataType visit(const luci::CircleGreater *) final { return loco::DataType::BOOL; }
{
// Type of If is not used. Just use input 0
assert(node->input_count() > 0);
- return loco::dtype_get(node->input(0));
+ return luci::dtype_get(node->input(0));
}
loco::DataType visit(const luci::CircleL2Normalize *node) final
{
- return loco::dtype_get(node->x());
+ return luci::dtype_get(node->x());
}
loco::DataType visit(const luci::CircleL2Pool2D *node) final
{
- return loco::dtype_get(node->value());
+ return luci::dtype_get(node->value());
}
loco::DataType visit(const luci::CircleLeakyRelu *node) final
{
- return loco::dtype_get(node->features());
+ return luci::dtype_get(node->features());
}
loco::DataType visit(const luci::CircleLess *) final { return loco::DataType::BOOL; }
loco::DataType visit(const luci::CircleLocalResponseNormalization *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
- loco::DataType visit(const luci::CircleLog *node) final { return loco::dtype_get(node->x()); }
+ loco::DataType visit(const luci::CircleLog *node) final { return luci::dtype_get(node->x()); }
loco::DataType visit(const luci::CircleLogicalAnd *node) final
{
- return loco::dtype_get(node->x());
+ return luci::dtype_get(node->x());
}
loco::DataType visit(const luci::CircleLogicalNot *node) final
{
- return loco::dtype_get(node->x());
+ return luci::dtype_get(node->x());
}
loco::DataType visit(const luci::CircleLogicalOr *node) final
{
- return loco::dtype_get(node->x());
+ return luci::dtype_get(node->x());
}
loco::DataType visit(const luci::CircleLogistic *node) final
{
- return loco::dtype_get(node->x());
+ return luci::dtype_get(node->x());
}
loco::DataType visit(const luci::CircleLogSoftmax *node) final
{
- return loco::dtype_get(node->logits());
+ return luci::dtype_get(node->logits());
}
loco::DataType visit(const luci::CircleMatrixDiag *node) final
{
- return loco::dtype_get(node->diagonal());
+ return luci::dtype_get(node->diagonal());
}
loco::DataType visit(const luci::CircleMatrixSetDiag *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
- loco::DataType visit(const luci::CircleMaximum *node) final { return loco::dtype_get(node->x()); }
+ loco::DataType visit(const luci::CircleMaximum *node) final { return luci::dtype_get(node->x()); }
loco::DataType visit(const luci::CircleMaxPool2D *node) final
{
- return loco::dtype_get(node->value());
+ return luci::dtype_get(node->value());
}
loco::DataType visit(const luci::CircleMean *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
- loco::DataType visit(const luci::CircleMinimum *node) final { return loco::dtype_get(node->x()); }
+ loco::DataType visit(const luci::CircleMinimum *node) final { return luci::dtype_get(node->x()); }
loco::DataType visit(const luci::CircleMirrorPad *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
- loco::DataType visit(const luci::CircleNeg *node) final { return loco::dtype_get(node->x()); }
+ loco::DataType visit(const luci::CircleNeg *node) final { return luci::dtype_get(node->x()); }
loco::DataType visit(const luci::CircleNonMaxSuppressionV4 *node) final
{
- return loco::dtype_get(node->boxes());
+ return luci::dtype_get(node->boxes());
}
loco::DataType visit(const luci::CircleNonMaxSuppressionV5 *node) final
{
- return loco::dtype_get(node->boxes());
+ return luci::dtype_get(node->boxes());
}
loco::DataType visit(const luci::CircleNotEqual *) final { return loco::DataType::BOOL; }
// Only support CirclePack with one or more inputs
assert(node->values_count() > 0);
- auto first_value_type = loco::dtype_get(node->values(0));
+ auto first_value_type = luci::dtype_get(node->values(0));
for (uint32_t i = 1; i < node->values_count(); ++i)
- assert(first_value_type == loco::dtype_get(node->values(i)));
+ assert(first_value_type == luci::dtype_get(node->values(i)));
return first_value_type;
}
- loco::DataType visit(const luci::CirclePad *node) final { return loco::dtype_get(node->input()); }
+ loco::DataType visit(const luci::CirclePad *node) final { return luci::dtype_get(node->input()); }
loco::DataType visit(const luci::CirclePadV2 *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
loco::DataType visit(const luci::CirclePow *node) final
{
// TODO make sure types cannot differ
- auto x_type = loco::dtype_get(node->x());
- auto y_type = loco::dtype_get(node->y());
+ auto x_type = luci::dtype_get(node->x());
+ auto y_type = luci::dtype_get(node->y());
if (x_type != y_type)
INTERNAL_EXN("Different datatype for x and y are not supported");
loco::DataType visit(const luci::CirclePRelu *node) final
{
- auto input_type = loco::dtype_get(node->input());
- auto alpha_type = loco::dtype_get(node->alpha());
+ auto input_type = luci::dtype_get(node->input());
+ auto alpha_type = luci::dtype_get(node->alpha());
if (input_type != alpha_type)
INTERNAL_EXN("Different datatype for input and alpha are not supported");
loco::DataType visit(const luci::CircleRange *node) final
{
- return loco::dtype_get(node->start());
+ return luci::dtype_get(node->start());
}
loco::DataType visit(const luci::CircleRank *) final { return loco::DataType::S32; }
- loco::DataType visit(const luci::CircleMul *node) final { return loco::dtype_get(node->x()); }
+ loco::DataType visit(const luci::CircleMul *node) final { return luci::dtype_get(node->x()); }
loco::DataType visit(const luci::CircleOneHot *node) final
{
- return loco::dtype_get(node->on_value());
+ return luci::dtype_get(node->on_value());
}
loco::DataType visit(const luci::CircleReduceAny *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
loco::DataType visit(const luci::CircleReduceMax *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
loco::DataType visit(const luci::CircleReduceMin *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
loco::DataType visit(const luci::CircleReduceProd *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
loco::DataType visit(const luci::CircleRelu *node) final
{
- return loco::dtype_get(node->features());
+ return luci::dtype_get(node->features());
}
loco::DataType visit(const luci::CircleRelu6 *node) final
{
- return loco::dtype_get(node->features());
+ return luci::dtype_get(node->features());
}
loco::DataType visit(const luci::CircleReluN1To1 *node) final
{
- return loco::dtype_get(node->features());
+ return luci::dtype_get(node->features());
}
loco::DataType visit(const luci::CircleReshape *node) final
{
- return loco::dtype_get(node->tensor());
+ return luci::dtype_get(node->tensor());
}
loco::DataType visit(const luci::CircleResizeBilinear *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
loco::DataType visit(const luci::CircleResizeNearestNeighbor *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
loco::DataType visit(const luci::CircleReverseSequence *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
loco::DataType visit(const luci::CircleReverseV2 *node) final
{
- return loco::dtype_get(node->tensor());
+ return luci::dtype_get(node->tensor());
}
- loco::DataType visit(const luci::CircleRound *node) final { return loco::dtype_get(node->x()); }
+ loco::DataType visit(const luci::CircleRound *node) final { return luci::dtype_get(node->x()); }
- loco::DataType visit(const luci::CircleRsqrt *node) final { return loco::dtype_get(node->x()); }
+ loco::DataType visit(const luci::CircleRsqrt *node) final { return luci::dtype_get(node->x()); }
loco::DataType visit(const luci::CircleScatterNd *node) final
{
- return loco::dtype_get(node->updates());
+ return luci::dtype_get(node->updates());
}
loco::DataType visit(const luci::CircleSegmentSum *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
loco::DataType visit(const luci::CircleSelect *node) final
{
- assert(loco::dtype_get(node->t()) == loco::dtype_get(node->e()));
- return loco::dtype_get(node->t());
+ assert(luci::dtype_get(node->t()) == luci::dtype_get(node->e()));
+ return luci::dtype_get(node->t());
}
loco::DataType visit(const luci::CircleSelectV2 *node) final
{
- assert(loco::dtype_get(node->t()) == loco::dtype_get(node->e()));
- return loco::dtype_get(node->t());
+ assert(luci::dtype_get(node->t()) == luci::dtype_get(node->e()));
+ return luci::dtype_get(node->t());
}
loco::DataType visit(const luci::CircleShape *node) final { return node->out_type(); }
- loco::DataType visit(const luci::CircleSin *node) final { return loco::dtype_get(node->x()); }
+ loco::DataType visit(const luci::CircleSin *node) final { return luci::dtype_get(node->x()); }
loco::DataType visit(const luci::CircleSlice *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
loco::DataType visit(const luci::CircleSoftmax *node) final
{
- return loco::dtype_get(node->logits());
+ return luci::dtype_get(node->logits());
}
loco::DataType visit(const luci::CircleSpaceToBatchND *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
loco::DataType visit(const luci::CircleSpaceToDepth *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
loco::DataType visit(const luci::CircleSparseToDense *node) final
{
- return loco::dtype_get(node->values());
+ return luci::dtype_get(node->values());
}
loco::DataType visit(const luci::CircleSplit *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
loco::DataType visit(const luci::CircleSplitV *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
- loco::DataType visit(const luci::CircleSqrt *node) final { return loco::dtype_get(node->x()); }
+ loco::DataType visit(const luci::CircleSqrt *node) final { return luci::dtype_get(node->x()); }
- loco::DataType visit(const luci::CircleSquare *node) final { return loco::dtype_get(node->x()); }
+ loco::DataType visit(const luci::CircleSquare *node) final { return luci::dtype_get(node->x()); }
loco::DataType visit(const luci::CircleSquaredDifference *node) final
{
- return loco::dtype_get(node->x());
+ return luci::dtype_get(node->x());
}
loco::DataType visit(const luci::CircleSqueeze *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
loco::DataType visit(const luci::CircleStridedSlice *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
- loco::DataType visit(const luci::CircleSub *node) final { return loco::dtype_get(node->x()); }
+ loco::DataType visit(const luci::CircleSub *node) final { return luci::dtype_get(node->x()); }
- loco::DataType visit(const luci::CircleSum *node) final { return loco::dtype_get(node->input()); }
+ loco::DataType visit(const luci::CircleSum *node) final { return luci::dtype_get(node->input()); }
- loco::DataType visit(const luci::CircleTanh *node) final { return loco::dtype_get(node->x()); }
+ loco::DataType visit(const luci::CircleTanh *node) final { return luci::dtype_get(node->x()); }
loco::DataType visit(const luci::CircleTile *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
loco::DataType visit(const luci::CircleTopKV2 *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
loco::DataType visit(const luci::CircleTranspose *node) final
{
- return loco::dtype_get(node->a());
+ return luci::dtype_get(node->a());
}
loco::DataType visit(const luci::CircleTransposeConv *node) final
{
- return loco::dtype_get(node->outBackprop());
+ return luci::dtype_get(node->outBackprop());
}
loco::DataType visit(const luci::CircleUnidirectionalSequenceLSTM *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
loco::DataType visit(const luci::CircleUnique *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
loco::DataType visit(const luci::CircleUnpack *node) final
{
- return loco::dtype_get(node->value());
+ return luci::dtype_get(node->value());
}
loco::DataType visit(const luci::CircleWhere *) final { return loco::DataType::S64; }
{
// Type of While is not used. Just use input 0
assert(node->input_count() > 0);
- return loco::dtype_get(node->input(0));
+ return luci::dtype_get(node->input(0));
}
loco::DataType visit(const luci::CircleZerosLike *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
// Circle Only
loco::DataType visit(const luci::CircleInstanceNorm *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
// Virtual
{
// We don't care for the type if from() is CircleOutputDummy or CircleOutputExclude
// from() type should match that of CircleOutput
- assert(output_dtype == loco::dtype_get(node->from()));
+ assert(output_dtype == luci::dtype_get(node->from()));
}
return output_dtype;
}
loco::DataType visit(const luci::CircleCustomOut *node) final { return node->dtype(); }
- loco::DataType visit(const luci::CircleIfOut *node) final
- {
- /**
- * @note IF operator type and shape are that of the "then" and "else"
- * Graph Outputs.
- */
- auto circle_if = dynamic_cast<const luci::CircleIf *>(node->input());
- if (circle_if == nullptr)
- {
- INTERNAL_EXN("CircleIf IR is not configured correctly");
- }
-
- auto index = node->index();
- auto then_graph = circle_if->then_graph();
- auto else_graph = circle_if->else_graph();
- assert(then_graph != nullptr);
- assert(else_graph != nullptr);
-
- // shape and type are assumed to be same
- // these are checked at post_import_graph() in Import
- auto then_outputs = loco::output_nodes(then_graph);
- auto else_outputs = loco::output_nodes(else_graph);
- assert(then_outputs.size() == else_outputs.size());
- assert(index < static_cast<int32_t>(then_outputs.size()));
-
- auto then_out = loco::must_cast<luci::CircleOutput *>(then_outputs.at(index));
- auto else_out = loco::must_cast<luci::CircleOutput *>(else_outputs.at(index));
-
- auto then_graph_outputs = then_graph->outputs(); // loco::GraphOutput items
- auto else_graph_outputs = else_graph->outputs();
- assert(then_graph_outputs->size() == else_graph_outputs->size());
-
- auto then_graph_output = then_graph_outputs->at(then_out->index());
- auto else_graph_output = else_graph_outputs->at(else_out->index());
- (void)else_graph_output; // make compiler happy for unused variable warnings
- assert(then_graph_output->dtype() == else_graph_output->dtype());
-
- return then_graph_output->dtype();
- }
-
loco::DataType visit(const luci::CircleNonMaxSuppressionV4Out *node) final
{
(void)node;
loco::DataType visit(const luci::CircleSplitOut *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
loco::DataType visit(const luci::CircleSplitVOut *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
loco::DataType visit(const luci::CircleTopKV2Out *node) final
{
// First output is same as input
if (node->index() == 0)
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
// Second outout is always S32
assert(node->index() == 1);
return loco::DataType::S32;
{
if (node->index() == 0)
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
assert(node->index() == 1);
auto unique = loco::must_cast<luci::CircleUnique *>(node->input());
loco::DataType visit(const luci::CircleUnpackOut *node) final
{
- return loco::dtype_get(node->input());
+ return luci::dtype_get(node->input());
}
loco::DataType visit(const luci::CircleWhileOut *node) final
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TestGraph.h"
-#include <luci/Service/CircleTypeInferenceRule.h>
-
-#include <luci/IR/CircleNodes.h>
-#include <luci/IR/CircleDialect.h>
-
-#include <loco.h>
-#include <loco/IR/CanonicalDialect.h>
-#include <loco/Service/TypeInference.h>
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-TEST(CircleTypeInferenceRuleTest, minimal_with_CircleRelu)
-{
- // Create a simple network
- luci::test::TestGraph graph;
- auto relu_node = graph.append<luci::CircleRelu>(graph.input_node);
- graph.complete(relu_node);
-
- // set dtype for nodes; like setting them in import
- graph.input_node->dtype(loco::DataType::S32);
- relu_node->dtype(loco::DataType::S32);
- graph.output_node->dtype(loco::DataType::S32);
-
- luci::test::graph_input_dtype(graph.input_node);
- luci::test::graph_output_dtype(graph.output_node);
-
- // pre-check
- ASSERT_FALSE(loco::dtype_known(relu_node));
-
- // type inference
- luci::CircleTypeInferenceRule circle_rule;
- loco::CanonicalTypeInferenceRule canon_rule;
- loco::MultiDialectTypeInferenceRule rules;
-
- rules.bind(loco::CanonicalDialect::get(), &canon_rule);
- rules.bind(luci::CircleDialect::get(), &circle_rule);
-
- loco::apply(&rules).to(graph.g.get());
-
- // Verify
- ASSERT_TRUE(loco::dtype_known(relu_node));
- auto type = loco::dtype_get(relu_node);
- ASSERT_EQ(loco::DataType::S32, type);
-}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleAbs *)
+{
+ return _graph->nodes()->create<luci::CircleAbs>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Abs)
+{
+ auto g = loco::make_graph();
+ auto node_abs = g->nodes()->create<luci::CircleAbs>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_abs, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_abs = dynamic_cast<luci::CircleAbs *>(cloned);
+ ASSERT_NE(nullptr, cloned_abs);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleAdd *node)
+{
+ if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+ return nullptr;
+
+ auto *cloned = _graph->nodes()->create<luci::CircleAdd>();
+ if (cloned != nullptr)
+ cloned->fusedActivationFunction(node->fusedActivationFunction());
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/CircleShapeInference.h>
+
+#include <loco/IR/TensorShape.h>
+
+#include <gtest/gtest.h>
+
+/**
+ * @note Function to test: Shape inference of two different input shapes
+ *
+ * Rank expansion to higher input side
+ * x(2,1,5) + y(3,5) --> x(2,1,5) + y(1,3,5)
+ * Do output shape inference like numpy
+ * x(2,1,5) + y(1,3,5) --> output(2,3,5)
+ * For each axis, dim value should be same OR one of them should be 1
+ */
+TEST(ShapeRuleTest, different_input_shapes_add)
+{
+ luci::CircleInput input1;
+ luci::CircleInput input2;
+ luci::CircleAdd add;
+
+ input1.shape({2, 1, 5});
+ input1.shape_status(luci::ShapeStatus::VALID);
+ input2.shape({3, 5});
+ input2.shape_status(luci::ShapeStatus::VALID);
+
+ add.x(&input1);
+ add.y(&input2);
+
+ loco::TensorShape shape;
+ luci::sinf::Rule shape_inf_rule;
+
+ ASSERT_TRUE(shape_inf_rule.infer(&add, shape));
+ ASSERT_EQ(3, shape.rank());
+ ASSERT_EQ(2, shape.dim(0).value());
+ ASSERT_EQ(3, shape.dim(1).value());
+ ASSERT_EQ(5, shape.dim(2).value());
+}
+
+TEST(CloneNodeTest, clone_Add)
+{
+ auto g = loco::make_graph();
+ auto node_add = g->nodes()->create<luci::CircleAdd>();
+ node_add->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_add, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_add = dynamic_cast<luci::CircleAdd *>(cloned);
+ ASSERT_NE(nullptr, cloned_add);
+ ASSERT_EQ(node_add->fusedActivationFunction(), cloned_add->fusedActivationFunction());
+}
+
+TEST(CloneNodeTest, clone_Add_NEG)
+{
+ auto g = loco::make_graph();
+ auto node_add = g->nodes()->create<luci::CircleAdd>();
+ node_add->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_add, gc.get());
+ ASSERT_EQ(nullptr, cloned);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleAddN *node)
+{
+ auto arity = node->arity();
+ return _graph->nodes()->create<luci::CircleAddN>(arity);
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_AddN)
+{
+ auto g = loco::make_graph();
+ auto node_addn = g->nodes()->create<luci::CircleAddN>(3);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_addn, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_addn = dynamic_cast<luci::CircleAddN *>(cloned);
+ ASSERT_NE(nullptr, cloned_addn);
+ ASSERT_EQ(node_addn->arity(), cloned_addn->arity());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleArgMax *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleArgMax>();
+ if (cloned != nullptr)
+ cloned->output_type(node->output_type());
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ArgMax)
+{
+ auto g = loco::make_graph();
+ auto node_argmax = g->nodes()->create<luci::CircleArgMax>();
+ node_argmax->output_type(loco::DataType::FLOAT32);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_argmax, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_argmax = dynamic_cast<luci::CircleArgMax *>(cloned);
+ ASSERT_NE(nullptr, cloned_argmax);
+ ASSERT_EQ(node_argmax->output_type(), cloned_argmax->output_type());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleArgMin *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleArgMin>();
+ if (cloned != nullptr)
+ cloned->output_type(node->output_type());
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ArgMin)
+{
+ auto g = loco::make_graph();
+ auto node_argmin = g->nodes()->create<luci::CircleArgMin>();
+ node_argmin->output_type(loco::DataType::FLOAT32);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_argmin, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_argmin = dynamic_cast<luci::CircleArgMin *>(cloned);
+ ASSERT_NE(nullptr, cloned_argmin);
+ ASSERT_EQ(node_argmin->output_type(), cloned_argmin->output_type());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleAveragePool2D *node)
+{
+ if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+ return nullptr;
+ if (node->padding() == luci::Padding::UNDEFINED)
+ return nullptr;
+
+ auto *cloned = _graph->nodes()->create<luci::CircleAveragePool2D>();
+ if (cloned != nullptr)
+ {
+ cloned->fusedActivationFunction(node->fusedActivationFunction());
+ cloned->padding(node->padding());
+ cloned->filter()->h(node->filter()->h());
+ cloned->filter()->w(node->filter()->w());
+ cloned->stride()->h(node->stride()->h());
+ cloned->stride()->w(node->stride()->w());
+ }
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/CircleShapeInference.h>
+
+#include <loco/IR/TensorShape.h>
+
+#include <gtest/gtest.h>
+
+TEST(ShapeRuleTest, simple_valid_pad_avgpool2d)
+{
+ luci::CircleInput input;
+ luci::CircleAveragePool2D avgpool_2d;
+
+ input.shape({1, 4, 3, 1});
+ input.shape_status(luci::ShapeStatus::VALID);
+
+ avgpool_2d.value(&input);
+ avgpool_2d.filter()->h(2);
+ avgpool_2d.filter()->w(2);
+ avgpool_2d.stride()->h(2);
+ avgpool_2d.stride()->w(2);
+ avgpool_2d.fusedActivationFunction(luci::FusedActFunc::NONE);
+ avgpool_2d.padding(luci::Padding::VALID);
+
+ loco::TensorShape shape;
+ luci::sinf::Rule shape_inf_rule;
+
+ ASSERT_TRUE(shape_inf_rule.infer(&avgpool_2d, shape));
+ ASSERT_EQ(4, shape.rank());
+ ASSERT_EQ(1, shape.dim(0).value());
+ ASSERT_EQ(2, shape.dim(1).value());
+ ASSERT_EQ(1, shape.dim(2).value());
+ ASSERT_EQ(1, shape.dim(3).value());
+}
+
+TEST(ShapeRuleTest, simple_same_pad_avgpool2d)
+{
+ luci::CircleInput input;
+ luci::CircleAveragePool2D avgpool_2d;
+
+ input.shape({1, 4, 3, 1});
+ input.shape_status(luci::ShapeStatus::VALID);
+
+ avgpool_2d.value(&input);
+ avgpool_2d.filter()->h(2);
+ avgpool_2d.filter()->w(2);
+ avgpool_2d.stride()->h(2);
+ avgpool_2d.stride()->w(2);
+ avgpool_2d.fusedActivationFunction(luci::FusedActFunc::NONE);
+ avgpool_2d.padding(luci::Padding::SAME);
+
+ loco::TensorShape shape;
+ luci::sinf::Rule shape_inf_rule;
+
+ ASSERT_TRUE(shape_inf_rule.infer(&avgpool_2d, shape));
+ ASSERT_EQ(4, shape.rank());
+ ASSERT_EQ(1, shape.dim(0).value());
+ ASSERT_EQ(2, shape.dim(1).value());
+ ASSERT_EQ(2, shape.dim(2).value());
+ ASSERT_EQ(1, shape.dim(3).value());
+}
+
+TEST(CloneNodeTest, clone_AveragePool2D)
+{
+ auto g = loco::make_graph();
+ auto node_avgpool2d = g->nodes()->create<luci::CircleAveragePool2D>();
+ node_avgpool2d->fusedActivationFunction(luci::FusedActFunc::RELU);
+ node_avgpool2d->padding(luci::Padding::SAME);
+ node_avgpool2d->filter()->h(1);
+ node_avgpool2d->filter()->w(2);
+ node_avgpool2d->stride()->h(3);
+ node_avgpool2d->stride()->w(4);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_avgpool2d, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_avgpool2d = dynamic_cast<luci::CircleAveragePool2D *>(cloned);
+ ASSERT_NE(nullptr, cloned_avgpool2d);
+ ASSERT_EQ(node_avgpool2d->fusedActivationFunction(), cloned_avgpool2d->fusedActivationFunction());
+ ASSERT_EQ(node_avgpool2d->padding(), cloned_avgpool2d->padding());
+ ASSERT_EQ(node_avgpool2d->filter()->h(), cloned_avgpool2d->filter()->h());
+ ASSERT_EQ(node_avgpool2d->filter()->w(), cloned_avgpool2d->filter()->w());
+ ASSERT_EQ(node_avgpool2d->stride()->h(), cloned_avgpool2d->stride()->h());
+ ASSERT_EQ(node_avgpool2d->stride()->w(), cloned_avgpool2d->stride()->w());
+}
+
+TEST(CloneNodeTest, clone_AveragePool2D_fusedact_NEG)
+{
+ auto g = loco::make_graph();
+ auto node_avgpool2d = g->nodes()->create<luci::CircleAveragePool2D>();
+ node_avgpool2d->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+ node_avgpool2d->padding(luci::Padding::SAME);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_avgpool2d, gc.get());
+ ASSERT_EQ(nullptr, cloned);
+}
+
+TEST(CloneNodeTest, clone_AveragePool2D_padding_NEG)
+{
+ auto g = loco::make_graph();
+ auto node_avgpool2d = g->nodes()->create<luci::CircleAveragePool2D>();
+ node_avgpool2d->fusedActivationFunction(luci::FusedActFunc::RELU);
+ node_avgpool2d->padding(luci::Padding::UNDEFINED);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_avgpool2d, gc.get());
+ ASSERT_EQ(nullptr, cloned);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleBCQFullyConnected *node)
+{
+ if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+ return nullptr;
+
+ auto *cloned = _graph->nodes()->create<luci::CircleBCQFullyConnected>();
+ if (cloned != nullptr)
+ {
+ cloned->fusedActivationFunction(node->fusedActivationFunction());
+ cloned->weights_hidden_size(node->weights_hidden_size());
+ }
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_BCQFullyConnected)
+{
+ auto g = loco::make_graph();
+ auto node_fc = g->nodes()->create<luci::CircleBCQFullyConnected>();
+ node_fc->fusedActivationFunction(luci::FusedActFunc::RELU);
+ node_fc->weights_hidden_size(3);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_fc, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_fc = dynamic_cast<luci::CircleBCQFullyConnected *>(cloned);
+ ASSERT_NE(nullptr, cloned_fc);
+ ASSERT_EQ(node_fc->fusedActivationFunction(), cloned_fc->fusedActivationFunction());
+ ASSERT_EQ(node_fc->weights_hidden_size(), cloned_fc->weights_hidden_size());
+}
+
+TEST(CloneNodeTest, clone_BCQFullyConnected_fusedact_NEG)
+{
+ auto g = loco::make_graph();
+ auto node_fc = g->nodes()->create<luci::CircleBCQFullyConnected>();
+ node_fc->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_fc, gc.get());
+ ASSERT_EQ(nullptr, cloned);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleBCQGather *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleBCQGather>();
+ if (cloned != nullptr)
+ {
+ cloned->axis(node->axis());
+ cloned->input_hidden_size(node->input_hidden_size());
+ }
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_BCQGather)
+{
+ auto g = loco::make_graph();
+ auto node_gat = g->nodes()->create<luci::CircleBCQGather>();
+ node_gat->axis(3);
+ node_gat->input_hidden_size(5);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_gat, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_gat = dynamic_cast<luci::CircleBCQGather *>(cloned);
+ ASSERT_NE(nullptr, cloned_gat);
+ ASSERT_EQ(node_gat->axis(), cloned_gat->axis());
+ ASSERT_EQ(node_gat->input_hidden_size(), cloned_gat->input_hidden_size());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleBatchMatMul *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleBatchMatMul>();
+ if (cloned != nullptr)
+ {
+ cloned->adj_x(node->adj_x());
+ cloned->adj_y(node->adj_y());
+ }
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_BatchMatMul)
+{
+ auto g = loco::make_graph();
+ auto node_bmm = g->nodes()->create<luci::CircleBatchMatMul>();
+ node_bmm->adj_x(true);
+ node_bmm->adj_y(true);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_bmm, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_bmm = dynamic_cast<luci::CircleBatchMatMul *>(cloned);
+ ASSERT_NE(nullptr, cloned_bmm);
+ ASSERT_EQ(node_bmm->adj_x(), cloned_bmm->adj_x());
+ ASSERT_EQ(node_bmm->adj_y(), cloned_bmm->adj_y());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleBatchToSpaceND *)
+{
+ return _graph->nodes()->create<luci::CircleBatchToSpaceND>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_BatchToSpaceND)
+{
+ auto g = loco::make_graph();
+ auto node_b2s = g->nodes()->create<luci::CircleBatchToSpaceND>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_b2s, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_b2s = dynamic_cast<luci::CircleBatchToSpaceND *>(cloned);
+ ASSERT_NE(nullptr, cloned_b2s);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleCast *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleCast>();
+ if (cloned != nullptr)
+ {
+ cloned->in_data_type(node->in_data_type());
+ cloned->out_data_type(node->out_data_type());
+ }
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Cast)
+{
+ auto g = loco::make_graph();
+ auto node_cast = g->nodes()->create<luci::CircleCast>();
+ node_cast->in_data_type(loco::DataType::U16);
+ node_cast->out_data_type(loco::DataType::S32);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_cast, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_cast = dynamic_cast<luci::CircleCast *>(cloned);
+ ASSERT_NE(nullptr, cloned_cast);
+ ASSERT_EQ(node_cast->in_data_type(), cloned_cast->in_data_type());
+ ASSERT_EQ(node_cast->out_data_type(), cloned_cast->out_data_type());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleCeil *)
+{
+ return _graph->nodes()->create<luci::CircleCeil>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Ceil)
+{
+ auto g = loco::make_graph();
+ auto node_ceil = g->nodes()->create<luci::CircleCeil>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_ceil, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_ceil = dynamic_cast<luci::CircleCeil *>(cloned);
+ ASSERT_NE(nullptr, cloned_ceil);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleConcatenation *node)
+{
+ if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+ return nullptr;
+
+ auto *cloned = _graph->nodes()->create<luci::CircleConcatenation>(node->numValues());
+ if (cloned != nullptr)
+ {
+ cloned->fusedActivationFunction(node->fusedActivationFunction());
+ cloned->axis(node->axis());
+ }
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Concatenation)
+{
+ auto g = loco::make_graph();
+ auto node_concat = g->nodes()->create<luci::CircleConcatenation>(3);
+ node_concat->fusedActivationFunction(luci::FusedActFunc::RELU);
+ node_concat->axis(7);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_concat, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_concat = dynamic_cast<luci::CircleConcatenation *>(cloned);
+ ASSERT_NE(nullptr, cloned_concat);
+ ASSERT_EQ(node_concat->numValues(), cloned_concat->numValues());
+ ASSERT_EQ(node_concat->fusedActivationFunction(), cloned_concat->fusedActivationFunction());
+ ASSERT_EQ(node_concat->axis(), cloned_concat->axis());
+}
+
+TEST(CloneNodeTest, clone_Concatenation_NEG)
+{
+ auto g = loco::make_graph();
+ auto node_concat = g->nodes()->create<luci::CircleConcatenation>(3);
+ node_concat->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_concat, gc.get());
+ ASSERT_EQ(nullptr, cloned);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <luci/IR/Nodes/CircleConst.h>
+
+#include <loco.h>
+#include <loco/IR/Graph.h>
+
+#include <oops/UserExn.h>
+
+#include <cassert>
+
+namespace
+{
+
+template <loco::DataType T>
+void copy_values(const luci::CircleConst *node, luci::CircleConst *cloned)
+{
+ assert(T == node->dtype());
+ assert(T == cloned->dtype());
+
+ const auto size = node->size<T>();
+ cloned->size<T>(size);
+ for (uint32_t i = 0; i < size; i++)
+ cloned->at<T>(i) = node->at<T>(i);
+}
+
+luci::CircleConst *clone_circleconst(const luci::CircleConst *node, loco::Graph *graph)
+{
+ auto cloned = graph->nodes()->create<luci::CircleConst>();
+
+ if (cloned != nullptr)
+ {
+ // dtype/shape
+ cloned->dtype(node->dtype());
+ cloned->rank(node->rank());
+
+ // values
+ switch (node->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ copy_values<loco::DataType::FLOAT32>(node, cloned);
+ break;
+
+ case loco::DataType::U8:
+ copy_values<loco::DataType::U8>(node, cloned);
+ break;
+
+ case loco::DataType::S8:
+ copy_values<loco::DataType::S8>(node, cloned);
+ break;
+
+ case loco::DataType::S16:
+ copy_values<loco::DataType::S16>(node, cloned);
+ break;
+
+ case loco::DataType::S32:
+ copy_values<loco::DataType::S32>(node, cloned);
+ break;
+
+ case loco::DataType::S64:
+ copy_values<loco::DataType::S64>(node, cloned);
+ break;
+
+ case loco::DataType::BOOL:
+ copy_values<loco::DataType::BOOL>(node, cloned);
+ break;
+
+ default:
+ throw oops::UserExn("Unsupported tensor dtype");
+ }
+ }
+
+ return cloned;
+}
+
+} // namespace
+
+namespace luci
+{
+
+luci::CircleConst *clone(luci::CircleConst *node)
+{
+ auto *cloned = clone_circleconst(node, node->graph());
+
+ copy_common_attributes(node, cloned);
+
+ return cloned;
+}
+
+} // namespace luci
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleConst *node)
+{
+ return clone_circleconst(node, _graph);
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/Nodes/CircleConst.h"
+#include "luci/Service/CircleNodeClone.h"
+
+#include <loco.h>
+#include <loco/IR/Graph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+luci::CircleConst *new_const_s32(loco::Graph *g)
+{
+ // prepare source CircleConst
+ auto circle_const = g->nodes()->create<luci::CircleConst>();
+
+ const auto size = 2;
+
+ circle_const->dtype(loco::DataType::S32);
+ circle_const->rank(1);
+ circle_const->dim(0).set(size);
+ circle_const->shape_status(luci::ShapeStatus::VALID);
+
+ circle_const->size<loco::DataType::S32>(size);
+ for (uint32_t i = 0; i < size; i++)
+ circle_const->at<loco::DataType::S32>(i) = i;
+
+ // quantparam
+ auto quantparam = std::make_unique<luci::CircleQuantParam>();
+ quantparam->scale = {1.0};
+ quantparam->zerop = {0};
+ quantparam->min = {-127.0};
+ quantparam->max = {127.0};
+ quantparam->quantized_dimension = 1;
+ circle_const->quantparam(std::move(quantparam));
+
+ // sparsityparam
+ auto sparam = std::make_unique<luci::SparsityParam>();
+ sparam->traversal_order = {1};
+ sparam->block_map = {1};
+ sparam->dim_metadata = {};
+ circle_const->sparsityparam(std::move(sparam));
+
+ return circle_const;
+}
+
+template <loco::DataType DT> luci::CircleConst *new_empty_const(loco::Graph *g)
+{
+ auto circle_const = g->nodes()->create<luci::CircleConst>();
+
+ const auto size = 0;
+
+ circle_const->dtype(DT);
+ circle_const->rank(1);
+ circle_const->dim(0).set(size);
+ circle_const->shape_status(luci::ShapeStatus::VALID);
+ circle_const->size<DT>(size);
+
+ return circle_const;
+}
+
+} // namespace
+
+TEST(CircleConstTest, clone)
+{
+ auto g = loco::make_graph();
+
+ // prepare source CircleConst
+ auto circle_const = new_const_s32(g.get());
+
+ // make a clone
+ auto const_cloned = luci::clone(circle_const);
+
+ // check attributes
+ ASSERT_EQ(loco::DataType::S32, const_cloned->dtype());
+ ASSERT_EQ(1, const_cloned->rank());
+ ASSERT_EQ(2, const_cloned->dim(0).value());
+ ASSERT_EQ(2, const_cloned->size<loco::DataType::S32>());
+ ASSERT_EQ(0, const_cloned->at<loco::DataType::S32>(0));
+ ASSERT_EQ(1, const_cloned->at<loco::DataType::S32>(1));
+ ASSERT_NE(nullptr, const_cloned->quantparam());
+ ASSERT_NE(nullptr, const_cloned->sparsityparam());
+}
+
+TEST(CircleConstTest, clone_U8)
+{
+ auto g = loco::make_graph();
+
+ // prepare source CircleConst
+ auto circle_const = new_empty_const<loco::DataType::U8>(g.get());
+
+ // make a clone
+ auto const_cloned = luci::clone(circle_const);
+
+ // check attributes
+ ASSERT_EQ(loco::DataType::U8, const_cloned->dtype());
+}
+
+TEST(CircleConstTest, clone_S8)
+{
+ auto g = loco::make_graph();
+
+ // prepare source CircleConst
+ auto circle_const = new_empty_const<loco::DataType::S8>(g.get());
+
+ // make a clone
+ auto const_cloned = luci::clone(circle_const);
+
+ // check attributes
+ ASSERT_EQ(loco::DataType::S8, const_cloned->dtype());
+}
+
+TEST(CircleConstTest, clone_S64)
+{
+ auto g = loco::make_graph();
+
+ // prepare source CircleConst
+ auto circle_const = new_empty_const<loco::DataType::S64>(g.get());
+
+ // make a clone
+ auto const_cloned = luci::clone(circle_const);
+
+ // check attributes
+ ASSERT_EQ(loco::DataType::S64, const_cloned->dtype());
+}
+
+TEST(CircleConstTest, clone_BOOL)
+{
+ auto g = loco::make_graph();
+
+ // prepare source CircleConst
+ auto circle_const = new_empty_const<loco::DataType::BOOL>(g.get());
+
+ // make a clone
+ auto const_cloned = luci::clone(circle_const);
+
+ // check attributes
+ ASSERT_EQ(loco::DataType::BOOL, const_cloned->dtype());
+}
+
+TEST(CloneNodeTest, clone_Const)
+{
+ auto g = loco::make_graph();
+ auto node_const = new_const_s32(g.get());
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_const, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_const = dynamic_cast<luci::CircleConst *>(cloned);
+ ASSERT_NE(nullptr, cloned_const);
+ ASSERT_EQ(loco::DataType::S32, cloned_const->dtype());
+ ASSERT_EQ(1, cloned_const->rank());
+ ASSERT_EQ(2, cloned_const->dim(0).value());
+ ASSERT_EQ(2, cloned_const->size<loco::DataType::S32>());
+ ASSERT_EQ(0, cloned_const->at<loco::DataType::S32>(0));
+ ASSERT_EQ(1, cloned_const->at<loco::DataType::S32>(1));
+ ASSERT_NE(nullptr, cloned_const->quantparam());
+ ASSERT_NE(nullptr, cloned_const->sparsityparam());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleConv2D *node)
+{
+ if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+ return nullptr;
+ if (node->padding() == luci::Padding::UNDEFINED)
+ return nullptr;
+
+ auto *cloned = _graph->nodes()->create<luci::CircleConv2D>();
+ if (cloned != nullptr)
+ {
+ cloned->fusedActivationFunction(node->fusedActivationFunction());
+ cloned->padding(node->padding());
+ cloned->stride()->h(node->stride()->h());
+ cloned->stride()->w(node->stride()->w());
+ cloned->dilation()->h(node->dilation()->h());
+ cloned->dilation()->w(node->dilation()->w());
+ }
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Conv2D)
+{
+ auto g = loco::make_graph();
+ auto node_conv2d = g->nodes()->create<luci::CircleConv2D>();
+ node_conv2d->fusedActivationFunction(luci::FusedActFunc::RELU);
+ node_conv2d->padding(luci::Padding::SAME);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_conv2d, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_conv2d = dynamic_cast<luci::CircleConv2D *>(cloned);
+ ASSERT_NE(nullptr, cloned_conv2d);
+ ASSERT_EQ(node_conv2d->fusedActivationFunction(), cloned_conv2d->fusedActivationFunction());
+ ASSERT_EQ(node_conv2d->padding(), cloned_conv2d->padding());
+}
+
+TEST(CloneNodeTest, clone_Conv2D_fusedact_NEG)
+{
+ auto g = loco::make_graph();
+ auto node_conv2d = g->nodes()->create<luci::CircleConv2D>();
+ node_conv2d->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+ node_conv2d->padding(luci::Padding::SAME);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_conv2d, gc.get());
+ ASSERT_EQ(nullptr, cloned);
+}
+
+TEST(CloneNodeTest, clone_Conv2D_padding_NEG)
+{
+ auto g = loco::make_graph();
+ auto node_conv2d = g->nodes()->create<luci::CircleConv2D>();
+ node_conv2d->fusedActivationFunction(luci::FusedActFunc::RELU);
+ node_conv2d->padding(luci::Padding::UNDEFINED);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_conv2d, gc.get());
+ ASSERT_EQ(nullptr, cloned);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleCos *)
+{
+ return _graph->nodes()->create<luci::CircleCos>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Cos)
+{
+ auto g = loco::make_graph();
+ auto node_cos = g->nodes()->create<luci::CircleCos>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_cos, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_cos = dynamic_cast<luci::CircleCos *>(cloned);
+ ASSERT_NE(nullptr, cloned_cos);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleCustom *node)
+{
+ uint32_t num_in = node->numInputs();
+ uint32_t num_out = node->numOutputs();
+ auto *cloned = _graph->nodes()->create<luci::CircleCustom>(num_in, num_out);
+ if (cloned != nullptr)
+ {
+ cloned->custom_options(node->custom_options());
+ cloned->custom_code(node->custom_code());
+ }
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+#include <string>
+#include <vector>
+
+TEST(CloneNodeTest, clone_Custom)
+{
+ auto g = loco::make_graph();
+ auto node_custom = g->nodes()->create<luci::CircleCustom>(2, 3);
+ std::vector<uint8_t> options({0x55, 0x56, 0x57});
+ std::string code = "hello";
+ node_custom->custom_options(options);
+ node_custom->custom_code(code);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_custom, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_custom = dynamic_cast<luci::CircleCustom *>(cloned);
+ ASSERT_NE(nullptr, cloned_custom);
+ auto cloned_options = cloned_custom->custom_options();
+ ASSERT_EQ(options.size(), cloned_options.size());
+ auto size = options.size();
+ for (size_t s = 0; s < size; ++s)
+ ASSERT_EQ(options.at(s), cloned_options.at(s));
+ ASSERT_TRUE(node_custom->custom_code() == cloned_custom->custom_code());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleCustomOut *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleCustomOut>();
+ if (cloned != nullptr)
+ cloned->index(node->index());
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_CustomOut)
+{
+ auto g = loco::make_graph();
+ auto node_cout = g->nodes()->create<luci::CircleCustomOut>();
+ node_cout->index(1);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_cout, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_cout = dynamic_cast<luci::CircleCustomOut *>(cloned);
+ ASSERT_NE(nullptr, cloned_cout);
+ ASSERT_EQ(node_cout->index(), cloned_cout->index());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleDepthToSpace *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleDepthToSpace>();
+ if (cloned != nullptr)
+ cloned->block_size(node->block_size());
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_DepthToSpace)
+{
+ auto g = loco::make_graph();
+ auto node_d2s = g->nodes()->create<luci::CircleDepthToSpace>();
+ node_d2s->block_size(32);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_d2s, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_d2s = dynamic_cast<luci::CircleDepthToSpace *>(cloned);
+ ASSERT_NE(nullptr, cloned_d2s);
+ ASSERT_EQ(node_d2s->block_size(), cloned_d2s->block_size());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleDepthwiseConv2D *node)
+{
+ if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+ return nullptr;
+ if (node->padding() == luci::Padding::UNDEFINED)
+ return nullptr;
+
+ auto *cloned = _graph->nodes()->create<luci::CircleDepthwiseConv2D>();
+ if (cloned != nullptr)
+ {
+ cloned->fusedActivationFunction(node->fusedActivationFunction());
+ cloned->padding(node->padding());
+ cloned->stride()->h(node->stride()->h());
+ cloned->stride()->w(node->stride()->w());
+ cloned->depthMultiplier(node->depthMultiplier());
+ cloned->dilation()->h(node->dilation()->h());
+ cloned->dilation()->w(node->dilation()->w());
+ }
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_DepthwiseConv2D)
+{
+ auto g = loco::make_graph();
+ auto node_dwconv2d = g->nodes()->create<luci::CircleDepthwiseConv2D>();
+ node_dwconv2d->fusedActivationFunction(luci::FusedActFunc::RELU);
+ node_dwconv2d->padding(luci::Padding::SAME);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_dwconv2d, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_dwconv2d = dynamic_cast<luci::CircleDepthwiseConv2D *>(cloned);
+ ASSERT_NE(nullptr, cloned_dwconv2d);
+ ASSERT_EQ(node_dwconv2d->fusedActivationFunction(), cloned_dwconv2d->fusedActivationFunction());
+ ASSERT_EQ(node_dwconv2d->padding(), cloned_dwconv2d->padding());
+}
+
+TEST(CloneNodeTest, clone_DepthwiseConv2D_fusedact_NEG)
+{
+ auto g = loco::make_graph();
+ auto node_dwconv2d = g->nodes()->create<luci::CircleDepthwiseConv2D>();
+ node_dwconv2d->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+ node_dwconv2d->padding(luci::Padding::SAME);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_dwconv2d, gc.get());
+ ASSERT_EQ(nullptr, cloned);
+}
+
+TEST(CloneNodeTest, clone_DepthwiseConv2D_padding_NEG)
+{
+ auto g = loco::make_graph();
+ auto node_dwconv2d = g->nodes()->create<luci::CircleDepthwiseConv2D>();
+ node_dwconv2d->fusedActivationFunction(luci::FusedActFunc::RELU);
+ node_dwconv2d->padding(luci::Padding::UNDEFINED);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_dwconv2d, gc.get());
+ ASSERT_EQ(nullptr, cloned);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleDequantize *)
+{
+ return _graph->nodes()->create<luci::CircleDequantize>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Dequantize)
+{
+ auto g = loco::make_graph();
+ auto node_dq = g->nodes()->create<luci::CircleDequantize>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_dq, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_dq = dynamic_cast<luci::CircleDequantize *>(cloned);
+ ASSERT_NE(nullptr, cloned_dq);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleDiv *node)
+{
+ if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+ return nullptr;
+
+ auto *cloned = _graph->nodes()->create<luci::CircleDiv>();
+ if (cloned != nullptr)
+ cloned->fusedActivationFunction(node->fusedActivationFunction());
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Div)
+{
+ auto g = loco::make_graph();
+ auto node_div = g->nodes()->create<luci::CircleDiv>();
+ node_div->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_div, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_div = dynamic_cast<luci::CircleDiv *>(cloned);
+ ASSERT_NE(nullptr, cloned_div);
+ ASSERT_EQ(node_div->fusedActivationFunction(), cloned_div->fusedActivationFunction());
+}
+
+TEST(CloneNodeTest, clone_Div_NEG)
+{
+ auto g = loco::make_graph();
+ auto node_div = g->nodes()->create<luci::CircleDiv>();
+ node_div->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_div, gc.get());
+ ASSERT_EQ(nullptr, cloned);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleElu *)
+{
+ return _graph->nodes()->create<luci::CircleElu>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Elu)
+{
+ auto g = loco::make_graph();
+ auto node_elu = g->nodes()->create<luci::CircleElu>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_elu, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_elu = dynamic_cast<luci::CircleElu *>(cloned);
+ ASSERT_NE(nullptr, cloned_elu);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleEqual *)
+{
+ return _graph->nodes()->create<luci::CircleEqual>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Equal)
+{
+ auto g = loco::make_graph();
+ auto node_eq = g->nodes()->create<luci::CircleEqual>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_eq, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_eq = dynamic_cast<luci::CircleEqual *>(cloned);
+ ASSERT_NE(nullptr, cloned_eq);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleExp *)
+{
+ return _graph->nodes()->create<luci::CircleExp>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Exp)
+{
+ auto g = loco::make_graph();
+ auto node_exp = g->nodes()->create<luci::CircleExp>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_exp, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_exp = dynamic_cast<luci::CircleExp *>(cloned);
+ ASSERT_NE(nullptr, cloned_exp);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleExpandDims *)
+{
+ return _graph->nodes()->create<luci::CircleExpandDims>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/CircleShapeInference.h>
+
+#include <loco/IR/TensorShape.h>
+
+#include <gtest/gtest.h>
+
+TEST(ShapeRuleTest, simple_expand_dims)
+{
+ luci::CircleInput input;
+ luci::CircleConst axis;
+ luci::CircleExpandDims expand_dims;
+
+ input.shape({4, 3});
+ input.shape_status(luci::ShapeStatus::VALID);
+
+ axis.dtype(loco::DataType::S32);
+ axis.rank(0);
+ axis.size<loco::DataType::S32>(1);
+ axis.at<loco::DataType::S32>(0) = 1;
+ axis.shape_status(luci::ShapeStatus::VALID);
+
+ expand_dims.input(&input);
+ expand_dims.axis(&axis);
+
+ loco::TensorShape shape;
+ luci::sinf::Rule shape_inf_rule;
+
+ ASSERT_TRUE(shape_inf_rule.infer(&expand_dims, shape));
+ ASSERT_EQ(3, shape.rank());
+ ASSERT_EQ(4, shape.dim(0).value());
+ ASSERT_EQ(1, shape.dim(1).value());
+ ASSERT_EQ(3, shape.dim(2).value());
+}
+
+TEST(CloneNodeTest, clone_ExpandDims)
+{
+ auto g = loco::make_graph();
+ auto node_ed = g->nodes()->create<luci::CircleExpandDims>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_ed, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_ed = dynamic_cast<luci::CircleExpandDims *>(cloned);
+ ASSERT_NE(nullptr, cloned_ed);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleFakeQuant *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleFakeQuant>();
+ if (cloned != nullptr)
+ {
+ cloned->min(node->min());
+ cloned->max(node->max());
+ cloned->num_bits(node->num_bits());
+ cloned->narrow_range(node->narrow_range());
+ }
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_FakeQuant)
+{
+ auto g = loco::make_graph();
+ auto node_fq = g->nodes()->create<luci::CircleFakeQuant>();
+ node_fq->min(1.0f);
+ node_fq->max(2.0f);
+ node_fq->num_bits(8);
+ node_fq->narrow_range(true);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_fq, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_fq = dynamic_cast<luci::CircleFakeQuant *>(cloned);
+ ASSERT_NE(nullptr, cloned_fq);
+ ASSERT_EQ(node_fq->min(), cloned_fq->min());
+ ASSERT_EQ(node_fq->max(), cloned_fq->max());
+ ASSERT_EQ(node_fq->num_bits(), cloned_fq->num_bits());
+ ASSERT_EQ(node_fq->narrow_range(), cloned_fq->narrow_range());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleFill *)
+{
+ return _graph->nodes()->create<luci::CircleFill>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Fill)
+{
+ auto g = loco::make_graph();
+ auto node_fill = g->nodes()->create<luci::CircleFill>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_fill, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_fill = dynamic_cast<luci::CircleFill *>(cloned);
+ ASSERT_NE(nullptr, cloned_fill);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleFloor *)
+{
+ return _graph->nodes()->create<luci::CircleFloor>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Floor)
+{
+ auto g = loco::make_graph();
+ auto node_floor = g->nodes()->create<luci::CircleFloor>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_floor, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_floor = dynamic_cast<luci::CircleFloor *>(cloned);
+ ASSERT_NE(nullptr, cloned_floor);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleFloorDiv *)
+{
+ return _graph->nodes()->create<luci::CircleFloorDiv>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_FloorDiv)
+{
+ auto g = loco::make_graph();
+ auto node_floordiv = g->nodes()->create<luci::CircleFloorDiv>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_floordiv, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_floordiv = dynamic_cast<luci::CircleFloorDiv *>(cloned);
+ ASSERT_NE(nullptr, cloned_floordiv);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleFloorMod *)
+{
+ return _graph->nodes()->create<luci::CircleFloorMod>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_FloorMod)
+{
+ auto g = loco::make_graph();
+ auto node_floormod = g->nodes()->create<luci::CircleFloorMod>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_floormod, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_floormod = dynamic_cast<luci::CircleFloorMod *>(cloned);
+ ASSERT_NE(nullptr, cloned_floormod);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleFullyConnected *node)
+{
+ if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+ return nullptr;
+ if (node->weights_format() == luci::CircleFullyConnected::WeightsFormat::UNDEFINED)
+ return nullptr;
+
+ auto *cloned = _graph->nodes()->create<luci::CircleFullyConnected>();
+ if (cloned != nullptr)
+ {
+ cloned->fusedActivationFunction(node->fusedActivationFunction());
+ cloned->weights_format(node->weights_format());
+ }
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_FullyConnected)
+{
+ auto g = loco::make_graph();
+ auto node_fc = g->nodes()->create<luci::CircleFullyConnected>();
+ node_fc->fusedActivationFunction(luci::FusedActFunc::RELU);
+ node_fc->weights_format(luci::CircleFullyConnected::WeightsFormat::DEFAULT);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_fc, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_fc = dynamic_cast<luci::CircleFullyConnected *>(cloned);
+ ASSERT_NE(nullptr, cloned_fc);
+ ASSERT_EQ(node_fc->fusedActivationFunction(), cloned_fc->fusedActivationFunction());
+ ASSERT_EQ(node_fc->weights_format(), cloned_fc->weights_format());
+}
+
+TEST(CloneNodeTest, clone_FullyConnected_fusedact_NEG)
+{
+ auto g = loco::make_graph();
+ auto node_fc = g->nodes()->create<luci::CircleFullyConnected>();
+ node_fc->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+ node_fc->weights_format(luci::CircleFullyConnected::WeightsFormat::DEFAULT);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_fc, gc.get());
+ ASSERT_EQ(nullptr, cloned);
+}
+
+TEST(CloneNodeTest, clone_FullyConnected_wf_NEG)
+{
+ auto g = loco::make_graph();
+ auto node_fc = g->nodes()->create<luci::CircleFullyConnected>();
+ node_fc->fusedActivationFunction(luci::FusedActFunc::RELU);
+ node_fc->weights_format(luci::CircleFullyConnected::WeightsFormat::UNDEFINED);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_fc, gc.get());
+ ASSERT_EQ(nullptr, cloned);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleGather *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleGather>();
+ if (cloned != nullptr)
+ cloned->axis(node->axis());
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Gather)
+{
+ auto g = loco::make_graph();
+ auto node_gat = g->nodes()->create<luci::CircleGather>();
+ node_gat->axis(3);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_gat, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_gat = dynamic_cast<luci::CircleGather *>(cloned);
+ ASSERT_NE(nullptr, cloned_gat);
+ ASSERT_EQ(node_gat->axis(), cloned_gat->axis());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleGatherNd *)
+{
+ return _graph->nodes()->create<luci::CircleGatherNd>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/CircleShapeInference.h>
+
+#include <loco/IR/TensorShape.h>
+
+#include <oops/InternalExn.h>
+
+#include <gtest/gtest.h>
+
+TEST(ShapeRuleTest, gather_nd_simple)
+{
+ luci::CircleInput input;
+ luci::CircleConst indices_const;
+ luci::CircleGatherNd gather_nd;
+
+ input.shape({1, 4, 4, 3});
+ indices_const.shape({1, 2, 3});
+
+ input.shape_status(luci::ShapeStatus::VALID);
+ indices_const.shape_status(luci::ShapeStatus::VALID);
+
+ gather_nd.params(&input);
+ gather_nd.indices(&indices_const);
+
+ loco::TensorShape shape;
+ luci::sinf::Rule shape_inf_rule;
+
+ ASSERT_TRUE(shape_inf_rule.infer(&gather_nd, shape));
+ ASSERT_EQ(3, shape.rank());
+ ASSERT_EQ(1, shape.dim(0).value());
+ ASSERT_EQ(2, shape.dim(1).value());
+ ASSERT_EQ(3, shape.dim(2).value());
+}
+
+TEST(ShapeRuleTest, gather_nd_slices)
+{
+ luci::CircleInput input;
+ luci::CircleConst indices_const;
+ luci::CircleGatherNd gather_nd;
+
+ input.shape({1, 4, 4, 3});
+ indices_const.shape({1, 2, 1});
+
+ input.shape_status(luci::ShapeStatus::VALID);
+ indices_const.shape_status(luci::ShapeStatus::VALID);
+
+ gather_nd.params(&input);
+ gather_nd.indices(&indices_const);
+
+ loco::TensorShape shape;
+ luci::sinf::Rule shape_inf_rule;
+
+ ASSERT_TRUE(shape_inf_rule.infer(&gather_nd, shape));
+ ASSERT_EQ(5, shape.rank());
+ ASSERT_EQ(1, shape.dim(0).value());
+ ASSERT_EQ(2, shape.dim(1).value());
+ ASSERT_EQ(4, shape.dim(2).value());
+ ASSERT_EQ(4, shape.dim(3).value());
+ ASSERT_EQ(3, shape.dim(4).value());
+}
+
+TEST(ShapeRuleTest, gather_nd_NEG)
+{
+ luci::CircleInput input;
+ luci::CircleConst indices_const;
+ luci::CircleGatherNd gather_nd;
+
+ input.shape({1, 4, 4, 3});
+ indices_const.shape({1, 2, 5});
+
+ input.shape_status(luci::ShapeStatus::VALID);
+ indices_const.shape_status(luci::ShapeStatus::VALID);
+
+ gather_nd.params(&input);
+ gather_nd.indices(&indices_const);
+
+ loco::TensorShape shape;
+ luci::sinf::Rule shape_inf_rule;
+
+ ASSERT_THROW(shape_inf_rule.infer(&gather_nd, shape), oops::InternalExn);
+}
+
+TEST(CloneNodeTest, clone_GatherNd)
+{
+ auto g = loco::make_graph();
+ auto node_gtnd = g->nodes()->create<luci::CircleGatherNd>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_gtnd, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_gtnd = dynamic_cast<luci::CircleGatherNd *>(cloned);
+ ASSERT_NE(nullptr, cloned_gtnd);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleGreater *)
+{
+ return _graph->nodes()->create<luci::CircleGreater>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Greater)
+{
+ auto g = loco::make_graph();
+ auto node_gt = g->nodes()->create<luci::CircleGreater>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_gt, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_gt = dynamic_cast<luci::CircleGreater *>(cloned);
+ ASSERT_NE(nullptr, cloned_gt);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleGreaterEqual *)
+{
+ return _graph->nodes()->create<luci::CircleGreaterEqual>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_GreaterEqual)
+{
+ auto g = loco::make_graph();
+ auto node_ge = g->nodes()->create<luci::CircleGreaterEqual>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_ge, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_ge = dynamic_cast<luci::CircleGreaterEqual *>(cloned);
+ ASSERT_NE(nullptr, cloned_ge);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci/Service/CircleShapeInference.h>
+#include <luci/Service/CircleTypeInference.h>
+
+namespace
+{
+
+struct CircleIfOutGraphs
+{
+ loco::GraphOutput *then_graph_output;
+ loco::GraphOutput *else_graph_output;
+};
+
+} // namespace
+
+namespace
+{
+
+CircleIfOutGraphs get_out_graphs(const luci::CircleIfOut *node)
+{
+ CircleIfOutGraphs ret_out;
+
+ /**
+ * @note IF operator type and shape are that of the "then" and "else"
+ * Graph Outputs.
+ */
+ auto circle_if = loco::must_cast<const luci::CircleIf *>(node->input());
+
+ auto index = node->index();
+ auto then_graph = circle_if->then_graph();
+ auto else_graph = circle_if->else_graph();
+ assert(then_graph != nullptr);
+ assert(else_graph != nullptr);
+
+ // shape and type are assumed to be same
+ // these are checked at post_import_graph() in Import
+ auto then_outputs = loco::output_nodes(then_graph);
+ auto else_outputs = loco::output_nodes(else_graph);
+ assert(then_outputs.size() == else_outputs.size());
+ assert(index < static_cast<int32_t>(then_outputs.size()));
+
+ auto then_out = loco::must_cast<luci::CircleOutput *>(then_outputs.at(index));
+ auto else_out = loco::must_cast<luci::CircleOutput *>(else_outputs.at(index));
+
+ auto then_graph_outputs = then_graph->outputs(); // loco::GraphOutput items
+ auto else_graph_outputs = else_graph->outputs();
+ assert(then_graph_outputs->size() == else_graph_outputs->size());
+
+ ret_out.then_graph_output = then_graph_outputs->at(then_out->index());
+ ret_out.else_graph_output = else_graph_outputs->at(else_out->index());
+
+ return ret_out;
+}
+
+} // namespace
+
+namespace luci
+{
+
+loco::TensorShape sinf::Algorithm::visit(const luci::CircleIfOut *node)
+{
+ auto graphs = get_out_graphs(node);
+ assert(*graphs.then_graph_output->shape() == *graphs.else_graph_output->shape());
+ return *graphs.then_graph_output->shape();
+}
+
+loco::DataType tinf::Algorithm::visit(const luci::CircleIfOut *node)
+{
+ auto graphs = get_out_graphs(node);
+ assert(graphs.then_graph_output->dtype() == graphs.else_graph_output->dtype());
+ return graphs.then_graph_output->dtype();
+}
+
+} // namespace luci
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <luci/Service/CircleShapeSignatureInference.h>
-
-namespace luci
-{
-
-ShapeSignature ssinf::Algorithm::visit(const luci::CircleInput *node)
-{
- return node->shape_signature();
-}
-
-} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleInstanceNorm *node)
+{
+ if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+ return nullptr;
+
+ auto *cloned = _graph->nodes()->create<luci::CircleInstanceNorm>();
+ if (cloned != nullptr)
+ {
+ cloned->fusedActivationFunction(node->fusedActivationFunction());
+ cloned->epsilon(node->epsilon());
+ }
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_InstanceNorm)
+{
+ auto g = loco::make_graph();
+ auto node_fc = g->nodes()->create<luci::CircleInstanceNorm>();
+ node_fc->fusedActivationFunction(luci::FusedActFunc::RELU);
+ node_fc->epsilon(3);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_fc, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_fc = dynamic_cast<luci::CircleInstanceNorm *>(cloned);
+ ASSERT_NE(nullptr, cloned_fc);
+ ASSERT_EQ(node_fc->fusedActivationFunction(), cloned_fc->fusedActivationFunction());
+ ASSERT_EQ(node_fc->epsilon(), cloned_fc->epsilon());
+}
+
+TEST(CloneNodeTest, clone_InstanceNorm_fusedact_NEG)
+{
+ auto g = loco::make_graph();
+ auto node_fc = g->nodes()->create<luci::CircleInstanceNorm>();
+ node_fc->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_fc, gc.get());
+ ASSERT_EQ(nullptr, cloned);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleL2Normalize *node)
+{
+ if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+ return nullptr;
+
+ auto *cloned = _graph->nodes()->create<luci::CircleL2Normalize>();
+ if (cloned != nullptr)
+ cloned->fusedActivationFunction(node->fusedActivationFunction());
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_L2Normalize)
+{
+ auto g = loco::make_graph();
+ auto node_l2n = g->nodes()->create<luci::CircleL2Normalize>();
+ node_l2n->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_l2n, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_l2n = dynamic_cast<luci::CircleL2Normalize *>(cloned);
+ ASSERT_NE(nullptr, cloned_l2n);
+ ASSERT_EQ(node_l2n->fusedActivationFunction(), cloned_l2n->fusedActivationFunction());
+}
+
+TEST(CloneNodeTest, clone_L2Normalize_NEG)
+{
+ auto g = loco::make_graph();
+ auto node_l2n = g->nodes()->create<luci::CircleL2Normalize>();
+ node_l2n->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_l2n, gc.get());
+ ASSERT_EQ(nullptr, cloned);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleL2Pool2D *node)
+{
+ if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+ return nullptr;
+ if (node->padding() == luci::Padding::UNDEFINED)
+ return nullptr;
+
+ auto *cloned = _graph->nodes()->create<luci::CircleL2Pool2D>();
+ if (cloned != nullptr)
+ {
+ cloned->fusedActivationFunction(node->fusedActivationFunction());
+ cloned->padding(node->padding());
+ cloned->filter()->h(node->filter()->h());
+ cloned->filter()->w(node->filter()->w());
+ cloned->stride()->h(node->stride()->h());
+ cloned->stride()->w(node->stride()->w());
+ }
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_L2Pool2D)
+{
+ auto g = loco::make_graph();
+ auto node_l2n = g->nodes()->create<luci::CircleL2Pool2D>();
+ node_l2n->fusedActivationFunction(luci::FusedActFunc::RELU);
+ node_l2n->padding(luci::Padding::SAME);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_l2n, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_l2n = dynamic_cast<luci::CircleL2Pool2D *>(cloned);
+ ASSERT_NE(nullptr, cloned_l2n);
+ ASSERT_EQ(node_l2n->fusedActivationFunction(), cloned_l2n->fusedActivationFunction());
+ ASSERT_EQ(node_l2n->padding(), cloned_l2n->padding());
+}
+
+TEST(CloneNodeTest, clone_L2Normalize_fusedact_NEG)
+{
+ auto g = loco::make_graph();
+ auto node_l2n = g->nodes()->create<luci::CircleL2Pool2D>();
+ node_l2n->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+ node_l2n->padding(luci::Padding::SAME);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_l2n, gc.get());
+ ASSERT_EQ(nullptr, cloned);
+}
+
+TEST(CloneNodeTest, clone_L2Normalize_padding_NEG)
+{
+ auto g = loco::make_graph();
+ auto node_l2n = g->nodes()->create<luci::CircleL2Pool2D>();
+ node_l2n->fusedActivationFunction(luci::FusedActFunc::RELU);
+ node_l2n->padding(luci::Padding::UNDEFINED);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_l2n, gc.get());
+ ASSERT_EQ(nullptr, cloned);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleLeakyRelu *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleLeakyRelu>();
+ if (cloned != nullptr)
+ cloned->alpha(node->alpha());
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_LeakyRelu)
+{
+ auto g = loco::make_graph();
+ auto node_lr = g->nodes()->create<luci::CircleLeakyRelu>();
+ node_lr->alpha(1.2f);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_lr, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_lr = dynamic_cast<luci::CircleLeakyRelu *>(cloned);
+ ASSERT_NE(nullptr, cloned_lr);
+ ASSERT_EQ(node_lr->alpha(), cloned_lr->alpha());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleLess *)
+{
+ return _graph->nodes()->create<luci::CircleLess>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Less)
+{
+ auto g = loco::make_graph();
+ auto node_less = g->nodes()->create<luci::CircleLess>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_less, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_less = dynamic_cast<luci::CircleLess *>(cloned);
+ ASSERT_NE(nullptr, cloned_less);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleLessEqual *)
+{
+ return _graph->nodes()->create<luci::CircleLessEqual>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_LessEqual)
+{
+ auto g = loco::make_graph();
+ auto node_le = g->nodes()->create<luci::CircleLessEqual>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_le, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_le = dynamic_cast<luci::CircleLessEqual *>(cloned);
+ ASSERT_NE(nullptr, cloned_le);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleLocalResponseNormalization *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleLocalResponseNormalization>();
+ if (cloned != nullptr)
+ {
+ cloned->radius(node->radius());
+ cloned->bias(node->bias());
+ cloned->alpha(node->alpha());
+ cloned->beta(node->beta());
+ }
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_LocalResponseNormalization)
+{
+ auto g = loco::make_graph();
+ auto node_lrn = g->nodes()->create<luci::CircleLocalResponseNormalization>();
+ node_lrn->radius(32);
+ node_lrn->bias(1.2f);
+ node_lrn->alpha(3.4f);
+ node_lrn->beta(5.7f);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_lrn, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_lrn = dynamic_cast<luci::CircleLocalResponseNormalization *>(cloned);
+ ASSERT_NE(nullptr, cloned_lrn);
+ ASSERT_EQ(node_lrn->radius(), cloned_lrn->radius());
+ ASSERT_EQ(node_lrn->bias(), cloned_lrn->bias());
+ ASSERT_EQ(node_lrn->alpha(), cloned_lrn->alpha());
+ ASSERT_EQ(node_lrn->beta(), cloned_lrn->beta());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleLog *)
+{
+ return _graph->nodes()->create<luci::CircleLog>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Log)
+{
+ auto g = loco::make_graph();
+ auto node_log = g->nodes()->create<luci::CircleLog>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_log, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_log = dynamic_cast<luci::CircleLog *>(cloned);
+ ASSERT_NE(nullptr, cloned_log);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleLogSoftmax *)
+{
+ return _graph->nodes()->create<luci::CircleLogSoftmax>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_LogSoftmax)
+{
+ auto g = loco::make_graph();
+ auto node_logs = g->nodes()->create<luci::CircleLogSoftmax>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_logs, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_logs = dynamic_cast<luci::CircleLogSoftmax *>(cloned);
+ ASSERT_NE(nullptr, cloned_logs);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleLogicalAnd *)
+{
+ return _graph->nodes()->create<luci::CircleLogicalAnd>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_LogicalAnd)
+{
+ auto g = loco::make_graph();
+ auto node_logand = g->nodes()->create<luci::CircleLogicalAnd>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_logand, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_logand = dynamic_cast<luci::CircleLogicalAnd *>(cloned);
+ ASSERT_NE(nullptr, cloned_logand);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleLogicalNot *)
+{
+ return _graph->nodes()->create<luci::CircleLogicalNot>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_LogicalNot)
+{
+ auto g = loco::make_graph();
+ auto node_lognot = g->nodes()->create<luci::CircleLogicalNot>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_lognot, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_lognot = dynamic_cast<luci::CircleLogicalNot *>(cloned);
+ ASSERT_NE(nullptr, cloned_lognot);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleLogicalOr *)
+{
+ return _graph->nodes()->create<luci::CircleLogicalOr>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_LogicalOr)
+{
+ auto g = loco::make_graph();
+ auto node_logor = g->nodes()->create<luci::CircleLogicalOr>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_logor, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_logor = dynamic_cast<luci::CircleLogicalOr *>(cloned);
+ ASSERT_NE(nullptr, cloned_logor);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleLogistic *)
+{
+ return _graph->nodes()->create<luci::CircleLogistic>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Logistic)
+{
+ auto g = loco::make_graph();
+ auto node_log = g->nodes()->create<luci::CircleLogistic>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_log, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_log = dynamic_cast<luci::CircleLogistic *>(cloned);
+ ASSERT_NE(nullptr, cloned_log);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleMatrixDiag *)
+{
+ return _graph->nodes()->create<luci::CircleMatrixDiag>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_MatrixDiag)
+{
+ auto g = loco::make_graph();
+ auto node_md = g->nodes()->create<luci::CircleMatrixDiag>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_md, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_md = dynamic_cast<luci::CircleMatrixDiag *>(cloned);
+ ASSERT_NE(nullptr, cloned_md);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleMatrixSetDiag *)
+{
+ return _graph->nodes()->create<luci::CircleMatrixSetDiag>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_MatrixSetDiag)
+{
+ auto g = loco::make_graph();
+ auto node_msd = g->nodes()->create<luci::CircleMatrixSetDiag>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_msd, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_msd = dynamic_cast<luci::CircleMatrixSetDiag *>(cloned);
+ ASSERT_NE(nullptr, cloned_msd);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleMaxPool2D *node)
+{
+ if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+ return nullptr;
+ if (node->padding() == luci::Padding::UNDEFINED)
+ return nullptr;
+
+ auto *cloned = _graph->nodes()->create<luci::CircleMaxPool2D>();
+ if (cloned != nullptr)
+ {
+ cloned->fusedActivationFunction(node->fusedActivationFunction());
+ cloned->padding(node->padding());
+ cloned->filter()->h(node->filter()->h());
+ cloned->filter()->w(node->filter()->w());
+ cloned->stride()->h(node->stride()->h());
+ cloned->stride()->w(node->stride()->w());
+ }
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_MaxPool2D)
+{
+ auto g = loco::make_graph();
+ auto node_mp = g->nodes()->create<luci::CircleMaxPool2D>();
+ node_mp->fusedActivationFunction(luci::FusedActFunc::RELU);
+ node_mp->padding(luci::Padding::SAME);
+ node_mp->filter()->h(1);
+ node_mp->filter()->w(2);
+ node_mp->stride()->h(3);
+ node_mp->stride()->w(4);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_mp, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_mp = dynamic_cast<luci::CircleMaxPool2D *>(cloned);
+ ASSERT_NE(nullptr, cloned_mp);
+ ASSERT_EQ(node_mp->fusedActivationFunction(), cloned_mp->fusedActivationFunction());
+ ASSERT_EQ(node_mp->padding(), cloned_mp->padding());
+ ASSERT_EQ(node_mp->filter()->h(), cloned_mp->filter()->h());
+ ASSERT_EQ(node_mp->filter()->w(), cloned_mp->filter()->w());
+ ASSERT_EQ(node_mp->stride()->h(), cloned_mp->stride()->h());
+ ASSERT_EQ(node_mp->stride()->w(), cloned_mp->stride()->w());
+}
+
+TEST(CloneNodeTest, clone_MaxPool2D_fusedact_NEG)
+{
+ auto g = loco::make_graph();
+ auto node_mp = g->nodes()->create<luci::CircleMaxPool2D>();
+ node_mp->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+ node_mp->padding(luci::Padding::SAME);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_mp, gc.get());
+ ASSERT_EQ(nullptr, cloned);
+}
+
+TEST(CloneNodeTest, clone_MaxPool2D_padding_NEG)
+{
+ auto g = loco::make_graph();
+ auto node_mp = g->nodes()->create<luci::CircleMaxPool2D>();
+ node_mp->fusedActivationFunction(luci::FusedActFunc::RELU);
+ node_mp->padding(luci::Padding::UNDEFINED);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_mp, gc.get());
+ ASSERT_EQ(nullptr, cloned);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleMaximum *)
+{
+ return _graph->nodes()->create<luci::CircleMaximum>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Maximum)
+{
+ auto g = loco::make_graph();
+ auto node_max = g->nodes()->create<luci::CircleMaximum>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_max, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_max = dynamic_cast<luci::CircleMaximum *>(cloned);
+ ASSERT_NE(nullptr, cloned_max);
+}
/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* limitations under the License.
*/
-#include <luci/Service/CircleShapeSignatureInference.h>
+#include "CircleCloneNode.h"
namespace luci
{
-ShapeSignature ssinf::Algorithm::visit(const luci::CircleMean *node)
+luci::CircleNode *CloneNode::visit(const luci::CircleMean *node)
{
- return legalized_signature(
- reduced_signature(node->input(), node->reduction_indices(), node->keep_dims()));
+ auto *cloned = _graph->nodes()->create<luci::CircleMean>();
+ if (cloned != nullptr)
+ cloned->keep_dims(node->keep_dims());
+ return cloned;
}
} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Mean)
+{
+ auto g = loco::make_graph();
+ auto node_mean = g->nodes()->create<luci::CircleMean>();
+ node_mean->keep_dims(true);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_mean, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_mean = dynamic_cast<luci::CircleMean *>(cloned);
+ ASSERT_NE(nullptr, cloned_mean);
+ ASSERT_EQ(node_mean->keep_dims(), cloned_mean->keep_dims());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleMinimum *)
+{
+ return _graph->nodes()->create<luci::CircleMinimum>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Minimum)
+{
+ auto g = loco::make_graph();
+ auto node_min = g->nodes()->create<luci::CircleMinimum>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_min, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_min = dynamic_cast<luci::CircleMinimum *>(cloned);
+ ASSERT_NE(nullptr, cloned_min);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleMirrorPad *node)
+{
+ if (node->mode() == luci::MirrorPadMode::UNDEFINED)
+ return nullptr;
+
+ auto *cloned = _graph->nodes()->create<luci::CircleMirrorPad>();
+ if (cloned != nullptr)
+ cloned->mode(node->mode());
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_MirrorPad)
+{
+ auto g = loco::make_graph();
+ auto node_mp = g->nodes()->create<luci::CircleMirrorPad>();
+ node_mp->mode(luci::MirrorPadMode::REFLECT);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_mp, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_mp = dynamic_cast<luci::CircleMirrorPad *>(cloned);
+ ASSERT_NE(nullptr, cloned_mp);
+ ASSERT_EQ(node_mp->mode(), cloned_mp->mode());
+}
+
+TEST(CloneNodeTest, clone_MirrorPad_mode_NEG)
+{
+ auto g = loco::make_graph();
+ auto node_mp = g->nodes()->create<luci::CircleMirrorPad>();
+ node_mp->mode(luci::MirrorPadMode::UNDEFINED);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_mp, gc.get());
+ ASSERT_EQ(nullptr, cloned);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleMul *node)
+{
+ if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+ return nullptr;
+
+ auto *cloned = _graph->nodes()->create<luci::CircleMul>();
+ if (cloned != nullptr)
+ cloned->fusedActivationFunction(node->fusedActivationFunction());
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Mul)
+{
+ auto g = loco::make_graph();
+ auto node_mul = g->nodes()->create<luci::CircleMul>();
+ node_mul->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_mul, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_mul = dynamic_cast<luci::CircleMul *>(cloned);
+ ASSERT_NE(nullptr, cloned_mul);
+ ASSERT_EQ(node_mul->fusedActivationFunction(), cloned_mul->fusedActivationFunction());
+}
+
+TEST(CloneNodeTest, clone_Mul_NEG)
+{
+ auto g = loco::make_graph();
+ auto node_mul = g->nodes()->create<luci::CircleMul>();
+ node_mul->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_mul, gc.get());
+ ASSERT_EQ(nullptr, cloned);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleNeg *)
+{
+ return _graph->nodes()->create<luci::CircleNeg>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Neg)
+{
+ auto g = loco::make_graph();
+ auto node_neg = g->nodes()->create<luci::CircleNeg>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_neg, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_neg = dynamic_cast<luci::CircleNeg *>(cloned);
+ ASSERT_NE(nullptr, cloned_neg);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleNonMaxSuppressionV4 *)
+{
+ return _graph->nodes()->create<luci::CircleNonMaxSuppressionV4>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_NonMaxSuppressionV4)
+{
+ auto g = loco::make_graph();
+ auto node_nms = g->nodes()->create<luci::CircleNonMaxSuppressionV4>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_nms, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_nms = dynamic_cast<luci::CircleNonMaxSuppressionV4 *>(cloned);
+ ASSERT_NE(nullptr, cloned_nms);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleNonMaxSuppressionV4Out *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleNonMaxSuppressionV4Out>();
+ if (cloned != nullptr)
+ cloned->index(node->index());
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_NonMaxSuppressionV4Out)
+{
+ auto g = loco::make_graph();
+ auto node_nout = g->nodes()->create<luci::CircleNonMaxSuppressionV4Out>();
+ node_nout->index(1);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_nout, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_nout = dynamic_cast<luci::CircleNonMaxSuppressionV4Out *>(cloned);
+ ASSERT_NE(nullptr, cloned_nout);
+ ASSERT_EQ(node_nout->index(), cloned_nout->index());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleNonMaxSuppressionV5 *)
+{
+ return _graph->nodes()->create<luci::CircleNonMaxSuppressionV5>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_NonMaxSuppressionV5)
+{
+ auto g = loco::make_graph();
+ auto node_nms = g->nodes()->create<luci::CircleNonMaxSuppressionV5>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_nms, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_nms = dynamic_cast<luci::CircleNonMaxSuppressionV5 *>(cloned);
+ ASSERT_NE(nullptr, cloned_nms);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleNonMaxSuppressionV5Out *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleNonMaxSuppressionV5Out>();
+ if (cloned != nullptr)
+ cloned->index(node->index());
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_NonMaxSuppressionV5Out)
+{
+ auto g = loco::make_graph();
+ auto node_nout = g->nodes()->create<luci::CircleNonMaxSuppressionV5Out>();
+ node_nout->index(1);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_nout, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_nout = dynamic_cast<luci::CircleNonMaxSuppressionV5Out *>(cloned);
+ ASSERT_NE(nullptr, cloned_nout);
+ ASSERT_EQ(node_nout->index(), cloned_nout->index());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleNotEqual *)
+{
+ return _graph->nodes()->create<luci::CircleNotEqual>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_NotEqual)
+{
+ auto g = loco::make_graph();
+ auto node_ne = g->nodes()->create<luci::CircleNotEqual>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_ne, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_ne = dynamic_cast<luci::CircleNotEqual *>(cloned);
+ ASSERT_NE(nullptr, cloned_ne);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleOneHot *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleOneHot>();
+ if (cloned != nullptr)
+ cloned->axis(node->axis());
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_OneHot)
+{
+ auto g = loco::make_graph();
+ auto node_oh = g->nodes()->create<luci::CircleOneHot>();
+ node_oh->axis(3);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_oh, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_oh = dynamic_cast<luci::CircleOneHot *>(cloned);
+ ASSERT_NE(nullptr, cloned_oh);
+ ASSERT_EQ(node_oh->axis(), cloned_oh->axis());
+}
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <luci/Service/CircleShapeSignatureInference.h>
-
-namespace luci
-{
-
-ShapeSignature ssinf::Algorithm::visit(const luci::CircleOutput *node)
-{
- return input_arg_signature(node, 0);
-}
-
-} // namespace luci
/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* limitations under the License.
*/
-#include <luci/Service/CircleShapeSignatureInference.h>
+#include "CircleCloneNode.h"
namespace luci
{
-ShapeSignature ssinf::Algorithm::visit(const luci::CircleOutputDummy *) { return ShapeSignature(); }
+luci::CircleNode *CloneNode::visit(const luci::CircleOutputDummy *)
+{
+ return _graph->nodes()->create<luci::CircleOutputDummy>();
+}
} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_OutputDummy)
+{
+ auto g = loco::make_graph();
+ auto node_dummy = g->nodes()->create<luci::CircleOutputDummy>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_dummy, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_dummy = dynamic_cast<luci::CircleOutputDummy *>(cloned);
+ ASSERT_NE(nullptr, cloned_dummy);
+}
/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* limitations under the License.
*/
-#include <luci/Service/CircleShapeSignatureInference.h>
+#include "CircleCloneNode.h"
namespace luci
{
-ShapeSignature ssinf::Algorithm::visit(const luci::CircleOutputExclude *)
+luci::CircleNode *CloneNode::visit(const luci::CircleOutputExclude *)
{
- return ShapeSignature();
+ return _graph->nodes()->create<luci::CircleOutputExclude>();
}
} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_OutputExclude)
+{
+ auto g = loco::make_graph();
+ auto node_outex = g->nodes()->create<luci::CircleOutputExclude>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_outex, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_outex = dynamic_cast<luci::CircleOutputExclude *>(cloned);
+ ASSERT_NE(nullptr, cloned_outex);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CirclePRelu *)
+{
+ return _graph->nodes()->create<luci::CirclePRelu>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_PRelu)
+{
+ auto g = loco::make_graph();
+ auto node_pr = g->nodes()->create<luci::CirclePRelu>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_pr, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_pr = dynamic_cast<luci::CirclePRelu *>(cloned);
+ ASSERT_NE(nullptr, cloned_pr);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CirclePack *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CirclePack>(node->values_count());
+ if (cloned != nullptr)
+ cloned->axis(node->axis());
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Pack)
+{
+ auto g = loco::make_graph();
+ auto node_pack = g->nodes()->create<luci::CirclePack>(3);
+ node_pack->axis(7);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_pack, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_pack = dynamic_cast<luci::CirclePack *>(cloned);
+ ASSERT_NE(nullptr, cloned_pack);
+ ASSERT_EQ(node_pack->values_count(), cloned_pack->values_count());
+ ASSERT_EQ(node_pack->axis(), cloned_pack->axis());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CirclePad *)
+{
+ return _graph->nodes()->create<luci::CirclePad>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Pad)
+{
+ auto g = loco::make_graph();
+ auto node_pad = g->nodes()->create<luci::CirclePad>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_pad, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_pad = dynamic_cast<luci::CirclePad *>(cloned);
+ ASSERT_NE(nullptr, cloned_pad);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CirclePadV2 *)
+{
+ return _graph->nodes()->create<luci::CirclePadV2>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_PadV2)
+{
+ auto g = loco::make_graph();
+ auto node_pad = g->nodes()->create<luci::CirclePadV2>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_pad, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_pad = dynamic_cast<luci::CirclePadV2 *>(cloned);
+ ASSERT_NE(nullptr, cloned_pad);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CirclePow *)
+{
+ return _graph->nodes()->create<luci::CirclePow>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Pow)
+{
+ auto g = loco::make_graph();
+ auto node_pow = g->nodes()->create<luci::CirclePow>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_pow, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_pow = dynamic_cast<luci::CirclePow *>(cloned);
+ ASSERT_NE(nullptr, cloned_pow);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleRange *)
+{
+ return _graph->nodes()->create<luci::CircleRange>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Range)
+{
+ auto g = loco::make_graph();
+ auto node_range = g->nodes()->create<luci::CircleRange>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_range, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_range = dynamic_cast<luci::CircleRange *>(cloned);
+ ASSERT_NE(nullptr, cloned_range);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleRank *)
+{
+ return _graph->nodes()->create<luci::CircleRank>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Rank)
+{
+ auto g = loco::make_graph();
+ auto node_rank = g->nodes()->create<luci::CircleRank>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_rank, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_rank = dynamic_cast<luci::CircleRank *>(cloned);
+ ASSERT_NE(nullptr, cloned_rank);
+}
/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* limitations under the License.
*/
-#include <luci/Service/CircleShapeSignatureInference.h>
+#include "CircleCloneNode.h"
namespace luci
{
-ShapeSignature ssinf::Algorithm::visit(const luci::CircleReduceAny *node)
+luci::CircleNode *CloneNode::visit(const luci::CircleReduceAny *node)
{
- return legalized_signature(
- reduced_signature(node->input(), node->reduction_indices(), node->keep_dims()));
+ auto *cloned = _graph->nodes()->create<luci::CircleReduceAny>();
+ if (cloned != nullptr)
+ cloned->keep_dims(node->keep_dims());
+ return cloned;
}
} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ReduceAny)
+{
+ auto g = loco::make_graph();
+ auto node_ra = g->nodes()->create<luci::CircleReduceAny>();
+ node_ra->keep_dims(true);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_ra, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_ra = dynamic_cast<luci::CircleReduceAny *>(cloned);
+ ASSERT_NE(nullptr, cloned_ra);
+ ASSERT_EQ(node_ra->keep_dims(), cloned_ra->keep_dims());
+}
/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* limitations under the License.
*/
-#include <luci/Service/CircleShapeSignatureInference.h>
+#include "CircleCloneNode.h"
namespace luci
{
-ShapeSignature ssinf::Algorithm::visit(const luci::CircleReduceMax *node)
+luci::CircleNode *CloneNode::visit(const luci::CircleReduceMax *node)
{
- return legalized_signature(
- reduced_signature(node->input(), node->reduction_indices(), node->keep_dims()));
+ auto *cloned = _graph->nodes()->create<luci::CircleReduceMax>();
+ if (cloned != nullptr)
+ cloned->keep_dims(node->keep_dims());
+ return cloned;
}
} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ReduceMax)
+{
+ auto g = loco::make_graph();
+ auto node_rmax = g->nodes()->create<luci::CircleReduceMax>();
+ node_rmax->keep_dims(true);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_rmax, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_rmax = dynamic_cast<luci::CircleReduceMax *>(cloned);
+ ASSERT_NE(nullptr, cloned_rmax);
+ ASSERT_EQ(node_rmax->keep_dims(), cloned_rmax->keep_dims());
+}
/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* limitations under the License.
*/
-#include <luci/Service/CircleShapeSignatureInference.h>
+#include "CircleCloneNode.h"
namespace luci
{
-ShapeSignature ssinf::Algorithm::visit(const luci::CircleReduceMin *node)
+luci::CircleNode *CloneNode::visit(const luci::CircleReduceMin *node)
{
- return legalized_signature(
- reduced_signature(node->input(), node->reduction_indices(), node->keep_dims()));
+ auto *cloned = _graph->nodes()->create<luci::CircleReduceMin>();
+ if (cloned != nullptr)
+ cloned->keep_dims(node->keep_dims());
+ return cloned;
}
} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ReduceMin)
+{
+ auto g = loco::make_graph();
+ auto node_rmin = g->nodes()->create<luci::CircleReduceMin>();
+ node_rmin->keep_dims(true);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_rmin, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_rmin = dynamic_cast<luci::CircleReduceMin *>(cloned);
+ ASSERT_NE(nullptr, cloned_rmin);
+ ASSERT_EQ(node_rmin->keep_dims(), cloned_rmin->keep_dims());
+}
/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* limitations under the License.
*/
-#include <luci/Service/CircleShapeSignatureInference.h>
+#include "CircleCloneNode.h"
namespace luci
{
-ShapeSignature ssinf::Algorithm::visit(const luci::CircleReduceProd *node)
+luci::CircleNode *CloneNode::visit(const luci::CircleReduceProd *node)
{
- return legalized_signature(
- reduced_signature(node->input(), node->reduction_indices(), node->keep_dims()));
+ auto *cloned = _graph->nodes()->create<luci::CircleReduceProd>();
+ if (cloned != nullptr)
+ cloned->keep_dims(node->keep_dims());
+ return cloned;
}
} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ReduceProd)
+{
+ auto g = loco::make_graph();
+ auto node_rp = g->nodes()->create<luci::CircleReduceProd>();
+ node_rp->keep_dims(true);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_rp, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_rp = dynamic_cast<luci::CircleReduceProd *>(cloned);
+ ASSERT_NE(nullptr, cloned_rp);
+ ASSERT_EQ(node_rp->keep_dims(), cloned_rp->keep_dims());
+}
/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* limitations under the License.
*/
-#include <luci/Service/CircleShapeSignatureInference.h>
+#include "CircleCloneNode.h"
namespace luci
{
-ShapeSignature ssinf::Algorithm::visit(const luci::CircleRelu *node)
+luci::CircleNode *CloneNode::visit(const luci::CircleRelu *)
{
- return input_arg_signature(node, 0);
+ return _graph->nodes()->create<luci::CircleRelu>();
}
} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/CircleShapeInference.h>
+#include <luci/Service/CircleTypeInference.h>
+
+#include <loco/IR/TensorShape.h>
+
+#include <gtest/gtest.h>
+
+TEST(ShapeRuleTest, simple_relu)
+{
+ luci::CircleInput input;
+ luci::CircleRelu relu;
+
+ input.shape({3, 4});
+ input.shape_status(luci::ShapeStatus::VALID);
+
+ relu.features(&input);
+
+ loco::TensorShape shape;
+ luci::sinf::Rule shape_inf_rule;
+
+ ASSERT_TRUE(shape_inf_rule.infer(&relu, shape));
+ ASSERT_EQ(2, shape.rank());
+ ASSERT_EQ(3, shape.dim(0).value());
+ ASSERT_EQ(4, shape.dim(1).value());
+}
+
+TEST(DataTypeRuleTest, simple_relu)
+{
+ luci::CircleInput input;
+ luci::CircleRelu relu;
+
+ input.dtype(loco::DataType::S32);
+
+ relu.features(&input);
+
+ loco::DataType dtype;
+ luci::tinf::Rule type_inf_rule;
+
+ ASSERT_TRUE(type_inf_rule.infer(&relu, dtype));
+ ASSERT_EQ(loco::DataType::S32, dtype);
+}
+
+TEST(CloneNodeTest, clone_Relu)
+{
+ auto g = loco::make_graph();
+ auto node_relu = g->nodes()->create<luci::CircleRelu>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_relu, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_relu = dynamic_cast<luci::CircleRelu *>(cloned);
+ ASSERT_NE(nullptr, cloned_relu);
+}
/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* limitations under the License.
*/
-#include <luci/Service/CircleShapeSignatureInference.h>
+#include "CircleCloneNode.h"
namespace luci
{
-ShapeSignature ssinf::Algorithm::visit(const luci::CircleRelu6 *node)
+luci::CircleNode *CloneNode::visit(const luci::CircleRelu6 *)
{
- return input_arg_signature(node, 0);
+ return _graph->nodes()->create<luci::CircleRelu6>();
}
} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <loco/IR/TensorShape.h>
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Relu6)
+{
+ auto g = loco::make_graph();
+ auto node_relu6 = g->nodes()->create<luci::CircleRelu6>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_relu6, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_relu6 = dynamic_cast<luci::CircleRelu6 *>(cloned);
+ ASSERT_NE(nullptr, cloned_relu6);
+}
/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* limitations under the License.
*/
-#include <luci/Service/CircleShapeSignatureInference.h>
+#include "CircleCloneNode.h"
namespace luci
{
-ShapeSignature ssinf::Algorithm::visit(const luci::CircleReluN1To1 *node)
+luci::CircleNode *CloneNode::visit(const luci::CircleReluN1To1 *)
{
- return input_arg_signature(node, 0);
+ return _graph->nodes()->create<luci::CircleReluN1To1>();
}
} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <loco/IR/TensorShape.h>
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ReluN1To1)
+{
+ auto g = loco::make_graph();
+ auto node_relun1 = g->nodes()->create<luci::CircleReluN1To1>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_relun1, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_relun1 = dynamic_cast<luci::CircleReluN1To1 *>(cloned);
+ ASSERT_NE(nullptr, cloned_relun1);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleReshape *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleReshape>();
+ if (cloned != nullptr)
+ {
+ uint32_t rank = node->newShape()->rank();
+ cloned->newShape()->rank(rank);
+ for (uint32_t r = 0; r < rank; ++r)
+ {
+ cloned->newShape()->dim(r) = node->newShape()->dim(r);
+ }
+ }
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Reshape)
+{
+ auto g = loco::make_graph();
+ auto node_reshape = g->nodes()->create<luci::CircleReshape>();
+ node_reshape->newShape()->rank(2);
+ node_reshape->newShape()->dim(0) = 3;
+ node_reshape->newShape()->dim(1) = 4;
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_reshape, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_reshape = dynamic_cast<luci::CircleReshape *>(cloned);
+ ASSERT_NE(nullptr, cloned_reshape);
+ ASSERT_EQ(node_reshape->newShape()->rank(), cloned_reshape->newShape()->rank());
+ ASSERT_EQ(node_reshape->newShape()->dim(0), cloned_reshape->newShape()->dim(0));
+ ASSERT_EQ(node_reshape->newShape()->dim(1), cloned_reshape->newShape()->dim(1));
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleResizeBilinear *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleResizeBilinear>();
+ if (cloned != nullptr)
+ {
+ cloned->align_corners(node->align_corners());
+ cloned->half_pixel_centers(node->half_pixel_centers());
+ }
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/CircleShapeInference.h>
+
+#include <loco/IR/TensorShape.h>
+
+#include <gtest/gtest.h>
+
+TEST(ShapeRuleTest, resize_bilinear_simple)
+{
+ luci::CircleInput input;
+ luci::CircleConst rb_size;
+ luci::CircleResizeBilinear rb;
+
+ input.shape({1, 4, 4, 3});
+ input.shape_status(luci::ShapeStatus::VALID);
+
+ rb_size.dtype(loco::DataType::S32);
+ rb_size.rank(1);
+ rb_size.dim(0).set(2);
+ rb_size.size<loco::DataType::S32>(2);
+ rb_size.at<loco::DataType::S32>(0) = 16;
+ rb_size.at<loco::DataType::S32>(1) = 16;
+ rb_size.shape_status(luci::ShapeStatus::VALID);
+
+ rb.input(&input);
+ rb.size(&rb_size);
+
+ loco::TensorShape shape;
+ luci::sinf::Rule shape_inf_rule;
+
+ ASSERT_TRUE(shape_inf_rule.infer(&rb, shape));
+ ASSERT_EQ(4, shape.rank());
+ ASSERT_EQ(1, shape.dim(0).value());
+ ASSERT_EQ(16, shape.dim(1).value());
+ ASSERT_EQ(16, shape.dim(2).value());
+ ASSERT_EQ(3, shape.dim(3).value());
+}
+
+TEST(CloneNodeTest, clone_ResizeBilinear)
+{
+ auto g = loco::make_graph();
+ auto node_rb = g->nodes()->create<luci::CircleResizeBilinear>();
+ node_rb->align_corners(true);
+ node_rb->half_pixel_centers(true);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_rb, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_rb = dynamic_cast<luci::CircleResizeBilinear *>(cloned);
+ ASSERT_NE(nullptr, cloned_rb);
+ ASSERT_EQ(node_rb->align_corners(), cloned_rb->align_corners());
+ ASSERT_EQ(node_rb->half_pixel_centers(), cloned_rb->half_pixel_centers());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleResizeNearestNeighbor *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleResizeNearestNeighbor>();
+ if (cloned != nullptr)
+ cloned->align_corners(node->align_corners());
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/CircleShapeInference.h>
+
+#include <loco/IR/TensorShape.h>
+
+#include <gtest/gtest.h>
+
+TEST(ShapeRuleTest, resize_nearest_neighbor_simple)
+{
+ luci::CircleInput input;
+ luci::CircleConst rnn_size;
+ luci::CircleResizeNearestNeighbor rnn;
+
+ input.shape({1, 4, 4, 3});
+ input.shape_status(luci::ShapeStatus::VALID);
+
+ rnn_size.dtype(loco::DataType::S32);
+ rnn_size.rank(1);
+ rnn_size.dim(0).set(2);
+ rnn_size.size<loco::DataType::S32>(2);
+ rnn_size.at<loco::DataType::S32>(0) = 16;
+ rnn_size.at<loco::DataType::S32>(1) = 16;
+ rnn_size.shape_status(luci::ShapeStatus::VALID);
+
+ rnn.input(&input);
+ rnn.size(&rnn_size);
+
+ loco::TensorShape shape;
+ luci::sinf::Rule shape_inf_rule;
+
+ ASSERT_TRUE(shape_inf_rule.infer(&rnn, shape));
+ ASSERT_EQ(4, shape.rank());
+ ASSERT_EQ(1, shape.dim(0).value());
+ ASSERT_EQ(16, shape.dim(1).value());
+ ASSERT_EQ(16, shape.dim(2).value());
+ ASSERT_EQ(3, shape.dim(3).value());
+}
+
+TEST(CloneNodeTest, clone_ResizeNearestNeighbor)
+{
+ auto g = loco::make_graph();
+ auto node_rnn = g->nodes()->create<luci::CircleResizeNearestNeighbor>();
+ node_rnn->align_corners(true);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_rnn, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_rnn = dynamic_cast<luci::CircleResizeNearestNeighbor *>(cloned);
+ ASSERT_NE(nullptr, cloned_rnn);
+ ASSERT_EQ(node_rnn->align_corners(), cloned_rnn->align_corners());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleReverseSequence *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleReverseSequence>();
+ if (cloned != nullptr)
+ {
+ cloned->seq_axis(node->seq_axis());
+ cloned->batch_axis(node->batch_axis());
+ }
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ReverseSequence)
+{
+ auto g = loco::make_graph();
+ auto node_rs = g->nodes()->create<luci::CircleReverseSequence>();
+ node_rs->seq_axis(1);
+ node_rs->batch_axis(2);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_rs, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_rs = dynamic_cast<luci::CircleReverseSequence *>(cloned);
+ ASSERT_NE(nullptr, cloned_rs);
+ ASSERT_EQ(node_rs->seq_axis(), cloned_rs->seq_axis());
+ ASSERT_EQ(node_rs->batch_axis(), cloned_rs->batch_axis());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleReverseV2 *)
+{
+ return _graph->nodes()->create<luci::CircleReverseV2>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ReverseV2)
+{
+ auto g = loco::make_graph();
+ auto node_rev = g->nodes()->create<luci::CircleReverseV2>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_rev, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_rev = dynamic_cast<luci::CircleReverseV2 *>(cloned);
+ ASSERT_NE(nullptr, cloned_rev);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleRound *)
+{
+ return _graph->nodes()->create<luci::CircleRound>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Round)
+{
+ auto g = loco::make_graph();
+ auto node_rnd = g->nodes()->create<luci::CircleRound>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_rnd, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_rnd = dynamic_cast<luci::CircleRound *>(cloned);
+ ASSERT_NE(nullptr, cloned_rnd);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleRsqrt *)
+{
+ return _graph->nodes()->create<luci::CircleRsqrt>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Rsqrt)
+{
+ auto g = loco::make_graph();
+ auto node_rsqrt = g->nodes()->create<luci::CircleRsqrt>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_rsqrt, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_rsqrt = dynamic_cast<luci::CircleRsqrt *>(cloned);
+ ASSERT_NE(nullptr, cloned_rsqrt);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleScatterNd *)
+{
+ return _graph->nodes()->create<luci::CircleScatterNd>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ScatterNd)
+{
+ auto g = loco::make_graph();
+ auto node_snd = g->nodes()->create<luci::CircleScatterNd>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_snd, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_snd = dynamic_cast<luci::CircleScatterNd *>(cloned);
+ ASSERT_NE(nullptr, cloned_snd);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSegmentSum *)
+{
+ return _graph->nodes()->create<luci::CircleSegmentSum>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_SegmentSum)
+{
+ auto g = loco::make_graph();
+ auto node_ss = g->nodes()->create<luci::CircleSegmentSum>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_ss, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_ss = dynamic_cast<luci::CircleSegmentSum *>(cloned);
+ ASSERT_NE(nullptr, cloned_ss);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSelect *)
+{
+ return _graph->nodes()->create<luci::CircleSelect>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Select)
+{
+ auto g = loco::make_graph();
+ auto node_sel = g->nodes()->create<luci::CircleSelect>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_sel, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_sel = dynamic_cast<luci::CircleSelect *>(cloned);
+ ASSERT_NE(nullptr, cloned_sel);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSelectV2 *)
+{
+ return _graph->nodes()->create<luci::CircleSelectV2>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_SelectV2)
+{
+ auto g = loco::make_graph();
+ auto node_sel = g->nodes()->create<luci::CircleSelectV2>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_sel, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_sel = dynamic_cast<luci::CircleSelectV2 *>(cloned);
+ ASSERT_NE(nullptr, cloned_sel);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleShape *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleShape>();
+ if (cloned != nullptr)
+ cloned->out_type(node->out_type());
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Shape)
+{
+ auto g = loco::make_graph();
+ auto node_shape = g->nodes()->create<luci::CircleShape>();
+ node_shape->out_type(loco::DataType::S32);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_shape, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_shape = dynamic_cast<luci::CircleShape *>(cloned);
+ ASSERT_NE(nullptr, cloned_shape);
+ ASSERT_EQ(node_shape->out_type(), cloned_shape->out_type());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSin *)
+{
+ return _graph->nodes()->create<luci::CircleSin>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Sin)
+{
+ auto g = loco::make_graph();
+ auto node_sin = g->nodes()->create<luci::CircleSin>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_sin, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_sin = dynamic_cast<luci::CircleSin *>(cloned);
+ ASSERT_NE(nullptr, cloned_sin);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSlice *)
+{
+ return _graph->nodes()->create<luci::CircleSlice>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Slice)
+{
+ auto g = loco::make_graph();
+ auto node_slice = g->nodes()->create<luci::CircleSlice>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_slice, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_slice = dynamic_cast<luci::CircleSlice *>(cloned);
+ ASSERT_NE(nullptr, cloned_slice);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSoftmax *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleSoftmax>();
+ if (cloned != nullptr)
+ cloned->beta(node->beta());
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Softmax)
+{
+ auto g = loco::make_graph();
+ auto node_sm = g->nodes()->create<luci::CircleSoftmax>();
+ node_sm->beta(2.3f);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_sm, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_sm = dynamic_cast<luci::CircleSoftmax *>(cloned);
+ ASSERT_NE(nullptr, cloned_sm);
+ ASSERT_EQ(node_sm->beta(), cloned_sm->beta());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSpaceToBatchND *)
+{
+ return _graph->nodes()->create<luci::CircleSpaceToBatchND>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_SpaceToBatchND)
+{
+ auto g = loco::make_graph();
+ auto node_s2bnd = g->nodes()->create<luci::CircleSpaceToBatchND>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_s2bnd, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_s2bnd = dynamic_cast<luci::CircleSpaceToBatchND *>(cloned);
+ ASSERT_NE(nullptr, cloned_s2bnd);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSpaceToDepth *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleSpaceToDepth>();
+ if (cloned != nullptr)
+ cloned->block_size(node->block_size());
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_SpaceToDepth)
+{
+ auto g = loco::make_graph();
+ auto node_s2d = g->nodes()->create<luci::CircleSpaceToDepth>();
+ node_s2d->block_size(32);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_s2d, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_s2d = dynamic_cast<luci::CircleSpaceToDepth *>(cloned);
+ ASSERT_NE(nullptr, cloned_s2d);
+ ASSERT_EQ(node_s2d->block_size(), cloned_s2d->block_size());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSparseToDense *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleSparseToDense>();
+ if (cloned != nullptr)
+ cloned->validate_indices(node->validate_indices());
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_SparseToDense)
+{
+ auto g = loco::make_graph();
+ auto node_s2d = g->nodes()->create<luci::CircleSparseToDense>();
+ node_s2d->validate_indices(true);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_s2d, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_s2d = dynamic_cast<luci::CircleSparseToDense *>(cloned);
+ ASSERT_NE(nullptr, cloned_s2d);
+ ASSERT_EQ(node_s2d->validate_indices(), cloned_s2d->validate_indices());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSplit *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleSplit>();
+ if (cloned != nullptr)
+ cloned->num_split(node->num_split());
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Split)
+{
+ auto g = loco::make_graph();
+ auto node_split = g->nodes()->create<luci::CircleSplit>();
+ node_split->num_split(5);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_split, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_split = dynamic_cast<luci::CircleSplit *>(cloned);
+ ASSERT_NE(nullptr, cloned_split);
+ ASSERT_EQ(node_split->num_split(), cloned_split->num_split());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSplitOut *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleSplitOut>();
+ if (cloned != nullptr)
+ cloned->index(node->index());
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_SplitOut)
+{
+ auto g = loco::make_graph();
+ auto node_sout = g->nodes()->create<luci::CircleSplitOut>();
+ node_sout->index(1);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_sout, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_sout = dynamic_cast<luci::CircleSplitOut *>(cloned);
+ ASSERT_NE(nullptr, cloned_sout);
+ ASSERT_EQ(node_sout->index(), cloned_sout->index());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSplitV *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleSplitV>();
+ if (cloned != nullptr)
+ cloned->num_split(node->num_split());
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_SplitV)
+{
+ auto g = loco::make_graph();
+ auto node_split = g->nodes()->create<luci::CircleSplitV>();
+ node_split->num_split(5);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_split, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_split = dynamic_cast<luci::CircleSplitV *>(cloned);
+ ASSERT_NE(nullptr, cloned_split);
+ ASSERT_EQ(node_split->num_split(), cloned_split->num_split());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSplitVOut *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleSplitVOut>();
+ if (cloned != nullptr)
+ cloned->index(node->index());
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_SplitVOut)
+{
+ auto g = loco::make_graph();
+ auto node_sout = g->nodes()->create<luci::CircleSplitVOut>();
+ node_sout->index(1);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_sout, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_sout = dynamic_cast<luci::CircleSplitVOut *>(cloned);
+ ASSERT_NE(nullptr, cloned_sout);
+ ASSERT_EQ(node_sout->index(), cloned_sout->index());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSqrt *)
+{
+ return _graph->nodes()->create<luci::CircleSqrt>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Sqrt)
+{
+ auto g = loco::make_graph();
+ auto node_sqrt = g->nodes()->create<luci::CircleSqrt>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_sqrt, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_sqrt = dynamic_cast<luci::CircleSqrt *>(cloned);
+ ASSERT_NE(nullptr, cloned_sqrt);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSquare *)
+{
+ return _graph->nodes()->create<luci::CircleSquare>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Square)
+{
+ auto g = loco::make_graph();
+ auto node_squ = g->nodes()->create<luci::CircleSquare>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_squ, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_squ = dynamic_cast<luci::CircleSquare *>(cloned);
+ ASSERT_NE(nullptr, cloned_squ);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSquaredDifference *)
+{
+ return _graph->nodes()->create<luci::CircleSquaredDifference>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_SquaredDifference)
+{
+ auto g = loco::make_graph();
+ auto node_sd = g->nodes()->create<luci::CircleSquaredDifference>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_sd, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_sd = dynamic_cast<luci::CircleSquaredDifference *>(cloned);
+ ASSERT_NE(nullptr, cloned_sd);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSqueeze *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleSqueeze>();
+ if (cloned != nullptr)
+ cloned->squeeze_dims(node->squeeze_dims());
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/CircleShapeInference.h>
+
+#include <loco/IR/TensorShape.h>
+
+#include <gtest/gtest.h>
+
+TEST(ShapeRuleTest, squeeze_simple)
+{
+ luci::CircleInput input;
+ luci::CircleSqueeze squeeze;
+
+ input.shape({1, 4, 3, 1});
+ input.shape_status(luci::ShapeStatus::VALID);
+
+ squeeze.input(&input);
+ squeeze.squeeze_dims({0});
+
+ loco::TensorShape shape;
+ luci::sinf::Rule shape_inf_rule;
+
+ ASSERT_TRUE(shape_inf_rule.infer(&squeeze, shape));
+ ASSERT_EQ(3, shape.rank());
+ ASSERT_EQ(4, shape.dim(0).value());
+ ASSERT_EQ(3, shape.dim(1).value());
+ ASSERT_EQ(1, shape.dim(2).value());
+}
+
+TEST(ShapeRuleTest, squeeze_all)
+{
+ luci::CircleInput input;
+ luci::CircleSqueeze squeeze;
+
+ input.shape({1, 4, 3, 1});
+ input.shape_status(luci::ShapeStatus::VALID);
+
+ squeeze.input(&input);
+ squeeze.squeeze_dims({});
+
+ loco::TensorShape shape;
+ luci::sinf::Rule shape_inf_rule;
+
+ ASSERT_TRUE(shape_inf_rule.infer(&squeeze, shape));
+ ASSERT_EQ(2, shape.rank());
+ ASSERT_EQ(4, shape.dim(0).value());
+ ASSERT_EQ(3, shape.dim(1).value());
+}
+
+TEST(CloneNodeTest, clone_Squeeze)
+{
+ auto g = loco::make_graph();
+ auto node_squ = g->nodes()->create<luci::CircleSqueeze>();
+ node_squ->squeeze_dims({2, 3});
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_squ, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_squ = dynamic_cast<luci::CircleSqueeze *>(cloned);
+ ASSERT_NE(nullptr, cloned_squ);
+ ASSERT_EQ(node_squ->squeeze_dims().size(), cloned_squ->squeeze_dims().size());
+ for (size_t s = 0; s < node_squ->squeeze_dims().size(); ++s)
+ ASSERT_EQ(node_squ->squeeze_dims().at(s), cloned_squ->squeeze_dims().at(s));
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleStridedSlice *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleStridedSlice>();
+ if (cloned != nullptr)
+ {
+ cloned->begin_mask(node->begin_mask());
+ cloned->end_mask(node->end_mask());
+ cloned->ellipsis_mask(node->ellipsis_mask());
+ cloned->new_axis_mask(node->new_axis_mask());
+ cloned->shrink_axis_mask(node->shrink_axis_mask());
+ }
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_StridedSlice)
+{
+ auto g = loco::make_graph();
+ auto node_ss = g->nodes()->create<luci::CircleStridedSlice>();
+ node_ss->begin_mask(1);
+ node_ss->end_mask(2);
+ node_ss->ellipsis_mask(3);
+ node_ss->new_axis_mask(4);
+ node_ss->shrink_axis_mask(5);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_ss, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_ss = dynamic_cast<luci::CircleStridedSlice *>(cloned);
+ ASSERT_NE(nullptr, cloned_ss);
+ ASSERT_EQ(node_ss->begin_mask(), cloned_ss->begin_mask());
+ ASSERT_EQ(node_ss->end_mask(), cloned_ss->end_mask());
+ ASSERT_EQ(node_ss->ellipsis_mask(), cloned_ss->ellipsis_mask());
+ ASSERT_EQ(node_ss->new_axis_mask(), cloned_ss->new_axis_mask());
+ ASSERT_EQ(node_ss->shrink_axis_mask(), cloned_ss->shrink_axis_mask());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSub *node)
+{
+ if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+ return nullptr;
+
+ auto *cloned = _graph->nodes()->create<luci::CircleSub>();
+ if (cloned != nullptr)
+ cloned->fusedActivationFunction(node->fusedActivationFunction());
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Sub)
+{
+ auto g = loco::make_graph();
+ auto node_sub = g->nodes()->create<luci::CircleSub>();
+ node_sub->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_sub, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_sub = dynamic_cast<luci::CircleSub *>(cloned);
+ ASSERT_NE(nullptr, cloned_sub);
+ ASSERT_EQ(node_sub->fusedActivationFunction(), cloned_sub->fusedActivationFunction());
+}
+
+TEST(CloneNodeTest, clone_Sub_NEG)
+{
+ auto g = loco::make_graph();
+ auto node_sub = g->nodes()->create<luci::CircleSub>();
+ node_sub->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_sub, gc.get());
+ ASSERT_EQ(nullptr, cloned);
+}
/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* limitations under the License.
*/
-#include <luci/Service/CircleShapeSignatureInference.h>
+#include "CircleCloneNode.h"
namespace luci
{
-ShapeSignature ssinf::Algorithm::visit(const luci::CircleSum *node)
+luci::CircleNode *CloneNode::visit(const luci::CircleSum *node)
{
- return legalized_signature(
- reduced_signature(node->input(), node->reduction_indices(), node->keep_dims()));
+ auto *cloned = _graph->nodes()->create<luci::CircleSum>();
+ if (cloned != nullptr)
+ cloned->keep_dims(node->keep_dims());
+ return cloned;
}
} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Sum)
+{
+ auto g = loco::make_graph();
+ auto node_sum = g->nodes()->create<luci::CircleSum>();
+ node_sum->keep_dims(true);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_sum, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_sum = dynamic_cast<luci::CircleSum *>(cloned);
+ ASSERT_NE(nullptr, cloned_sum);
+ ASSERT_EQ(node_sum->keep_dims(), cloned_sum->keep_dims());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleTanh *)
+{
+ return _graph->nodes()->create<luci::CircleTanh>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Tanh)
+{
+ auto g = loco::make_graph();
+ auto node_tanh = g->nodes()->create<luci::CircleTanh>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_tanh, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_tanh = dynamic_cast<luci::CircleTanh *>(cloned);
+ ASSERT_NE(nullptr, cloned_tanh);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleTile *)
+{
+ return _graph->nodes()->create<luci::CircleTile>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Tile)
+{
+ auto g = loco::make_graph();
+ auto node_tile = g->nodes()->create<luci::CircleTile>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_tile, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_tile = dynamic_cast<luci::CircleTile *>(cloned);
+ ASSERT_NE(nullptr, cloned_tile);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleTopKV2 *)
+{
+ return _graph->nodes()->create<luci::CircleTopKV2>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_TopKV2)
+{
+ auto g = loco::make_graph();
+ auto node_top = g->nodes()->create<luci::CircleTopKV2>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_top, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_top = dynamic_cast<luci::CircleTopKV2 *>(cloned);
+ ASSERT_NE(nullptr, cloned_top);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleTopKV2Out *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleTopKV2Out>();
+ if (cloned != nullptr)
+ cloned->index(node->index());
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_TopKV2Out)
+{
+ auto g = loco::make_graph();
+ auto node_tout = g->nodes()->create<luci::CircleTopKV2Out>();
+ node_tout->index(1);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_tout, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_tout = dynamic_cast<luci::CircleTopKV2Out *>(cloned);
+ ASSERT_NE(nullptr, cloned_tout);
+ ASSERT_EQ(node_tout->index(), cloned_tout->index());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleTranspose *)
+{
+ return _graph->nodes()->create<luci::CircleTranspose>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/CircleShapeInference.h>
+
+#include <loco/IR/TensorShape.h>
+
+#include <gtest/gtest.h>
+
+TEST(ShapeRuleTest, transpose_simple)
+{
+ luci::CircleInput input;
+ luci::CircleConst perm;
+ luci::CircleTranspose transpose;
+
+ input.shape({3, 8, 1});
+ input.shape_status(luci::ShapeStatus::VALID);
+
+ perm.dtype(loco::DataType::S32);
+ perm.rank(1);
+ perm.dim(0).set(3);
+ perm.size<loco::DataType::S32>(3);
+ perm.at<loco::DataType::S32>(0) = 1;
+ perm.at<loco::DataType::S32>(1) = 2;
+ perm.at<loco::DataType::S32>(2) = 0;
+ perm.shape_status(luci::ShapeStatus::VALID);
+
+ transpose.a(&input);
+ transpose.perm(&perm);
+
+ loco::TensorShape shape;
+ luci::sinf::Rule shape_inf_rule;
+
+ ASSERT_TRUE(shape_inf_rule.infer(&transpose, shape));
+ ASSERT_EQ(3, shape.rank());
+ ASSERT_EQ(8, shape.dim(0).value());
+ ASSERT_EQ(1, shape.dim(1).value());
+ ASSERT_EQ(3, shape.dim(2).value());
+}
+
+TEST(CloneNodeTest, clone_Transpose)
+{
+ auto g = loco::make_graph();
+ auto node_tr = g->nodes()->create<luci::CircleTranspose>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_tr, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_tr = dynamic_cast<luci::CircleTranspose *>(cloned);
+ ASSERT_NE(nullptr, cloned_tr);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleTransposeConv *node)
+{
+ if (node->padding() == luci::Padding::UNDEFINED)
+ return nullptr;
+
+ auto *cloned = _graph->nodes()->create<luci::CircleTransposeConv>();
+ if (cloned != nullptr)
+ {
+ cloned->padding(node->padding());
+ cloned->stride()->h(node->stride()->h());
+ cloned->stride()->w(node->stride()->w());
+ }
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_TransposeConv)
+{
+ auto g = loco::make_graph();
+ auto node_trconv = g->nodes()->create<luci::CircleTransposeConv>();
+ node_trconv->padding(luci::Padding::SAME);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_trconv, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_trconv = dynamic_cast<luci::CircleTransposeConv *>(cloned);
+ ASSERT_NE(nullptr, cloned_trconv);
+ ASSERT_EQ(node_trconv->padding(), cloned_trconv->padding());
+}
+
+TEST(CloneNodeTest, clone_TransposeConv_padding_NEG)
+{
+ auto g = loco::make_graph();
+ auto node_trconv = g->nodes()->create<luci::CircleTransposeConv>();
+ node_trconv->padding(luci::Padding::UNDEFINED);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_trconv, gc.get());
+ ASSERT_EQ(nullptr, cloned);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleUnidirectionalSequenceLSTM *node)
+{
+ if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+ return nullptr;
+
+ auto *cloned = _graph->nodes()->create<luci::CircleUnidirectionalSequenceLSTM>();
+ if (cloned != nullptr)
+ {
+ cloned->fusedActivationFunction(node->fusedActivationFunction());
+ cloned->cell_clip(node->cell_clip());
+ cloned->proj_clip(node->proj_clip());
+ cloned->time_major(node->time_major());
+ cloned->asymmetric_quantize_inputs(node->asymmetric_quantize_inputs());
+ }
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_UnidirectionalSequenceLSTM)
+{
+ auto g = loco::make_graph();
+ auto node_uslstm = g->nodes()->create<luci::CircleUnidirectionalSequenceLSTM>();
+ node_uslstm->fusedActivationFunction(luci::FusedActFunc::RELU);
+ node_uslstm->cell_clip(1.1f);
+ node_uslstm->proj_clip(2.2f);
+ node_uslstm->time_major(true);
+ node_uslstm->asymmetric_quantize_inputs(true);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_uslstm, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_uslstm = dynamic_cast<luci::CircleUnidirectionalSequenceLSTM *>(cloned);
+ ASSERT_NE(nullptr, cloned_uslstm);
+ ASSERT_EQ(node_uslstm->fusedActivationFunction(), cloned_uslstm->fusedActivationFunction());
+ ASSERT_EQ(node_uslstm->cell_clip(), cloned_uslstm->cell_clip());
+ ASSERT_EQ(node_uslstm->proj_clip(), cloned_uslstm->proj_clip());
+ ASSERT_EQ(node_uslstm->time_major(), cloned_uslstm->time_major());
+ ASSERT_EQ(node_uslstm->asymmetric_quantize_inputs(), cloned_uslstm->asymmetric_quantize_inputs());
+}
+
+TEST(CloneNodeTest, clone_UnidirectionalSequenceLSTM_NEG)
+{
+ auto g = loco::make_graph();
+ auto node_uslstm = g->nodes()->create<luci::CircleUnidirectionalSequenceLSTM>();
+ node_uslstm->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_uslstm, gc.get());
+ ASSERT_EQ(nullptr, cloned);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleUnique *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleUnique>();
+ if (cloned != nullptr)
+ cloned->idx_out_type(node->idx_out_type());
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Unique)
+{
+ auto g = loco::make_graph();
+ auto node_uniq = g->nodes()->create<luci::CircleUnique>();
+ node_uniq->idx_out_type(loco::DataType::S32);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_uniq, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_uniq = dynamic_cast<luci::CircleUnique *>(cloned);
+ ASSERT_NE(nullptr, cloned_uniq);
+ ASSERT_EQ(node_uniq->idx_out_type(), cloned_uniq->idx_out_type());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleUniqueOut *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleUniqueOut>();
+ if (cloned != nullptr)
+ cloned->index(node->index());
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_UniqueOut)
+{
+ auto g = loco::make_graph();
+ auto node_uout = g->nodes()->create<luci::CircleUniqueOut>();
+ node_uout->index(1);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_uout, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_uout = dynamic_cast<luci::CircleUniqueOut *>(cloned);
+ ASSERT_NE(nullptr, cloned_uout);
+ ASSERT_EQ(node_uout->index(), cloned_uout->index());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleUnpack *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleUnpack>();
+ if (cloned != nullptr)
+ {
+ cloned->num(node->num());
+ cloned->axis(node->axis());
+ }
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Unpack)
+{
+ auto g = loco::make_graph();
+ auto node_unp = g->nodes()->create<luci::CircleUnpack>();
+ node_unp->num(1);
+ node_unp->axis(2);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_unp, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_unp = dynamic_cast<luci::CircleUnpack *>(cloned);
+ ASSERT_NE(nullptr, cloned_unp);
+ ASSERT_EQ(node_unp->num(), cloned_unp->num());
+ ASSERT_EQ(node_unp->axis(), cloned_unp->axis());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleUnpackOut *node)
+{
+ auto *cloned = _graph->nodes()->create<luci::CircleUnpackOut>();
+ if (cloned != nullptr)
+ cloned->index(node->index());
+ return cloned;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_UnpackOut)
+{
+ auto g = loco::make_graph();
+ auto node_uout = g->nodes()->create<luci::CircleUnpackOut>();
+ node_uout->index(1);
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_uout, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_uout = dynamic_cast<luci::CircleUnpackOut *>(cloned);
+ ASSERT_NE(nullptr, cloned_uout);
+ ASSERT_EQ(node_uout->index(), cloned_uout->index());
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleWhere *)
+{
+ return _graph->nodes()->create<luci::CircleWhere>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Where)
+{
+ auto g = loco::make_graph();
+ auto node_wh = g->nodes()->create<luci::CircleWhere>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_wh, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_wh = dynamic_cast<luci::CircleWhere *>(cloned);
+ ASSERT_NE(nullptr, cloned_wh);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleZerosLike *)
+{
+ return _graph->nodes()->create<luci::CircleZerosLike>();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ZerosLike)
+{
+ auto g = loco::make_graph();
+ auto node_zl = g->nodes()->create<luci::CircleZerosLike>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_zl, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_zl = dynamic_cast<luci::CircleZerosLike *>(cloned);
+ ASSERT_NE(nullptr, cloned_zl);
+}
res._dims.resize(circle_node->rank());
for (uint32_t i = 0; i < circle_node->rank(); ++i)
- res._dims.at(i) = circle_node->dim(i).value();
+ res._dims.at(i) = circle_node->dim(i).known() ? circle_node->dim(i).value() : -1;
return res;
}
return res;
}
-ShapeDescription to_shape_description(const loco::FeatureShape &shape)
-{
- ShapeDescription res;
-
- res._rank_known = true;
-
- // T/F Lite encodes a feature map as a NHWC tensor
- res._dims.resize(4);
- res._dims.at(0) = shape.count().value();
- res._dims.at(1) = shape.height().value();
- res._dims.at(2) = shape.width().value();
- res._dims.at(3) = shape.depth().value();
-
- return res;
-}
-
-ShapeDescription to_shape_description(const loco::FilterShape &shape)
-{
- ShapeDescription res;
-
- res._rank_known = true;
-
- // T/F Lite encodes a convolution filter as a NHWC tensor
- res._dims.resize(4);
- res._dims.at(0) = shape.count().value();
- res._dims.at(1) = shape.height().value();
- res._dims.at(2) = shape.width().value();
- res._dims.at(3) = shape.depth().value();
-
- return res;
-}
-
-ShapeDescription to_shape_description(const loco::DepthwiseFilterShape &shape)
-{
- ShapeDescription res;
-
- res._rank_known = true;
-
- // T/F Lite encodes a depthwise convolution filter as a [1, H, W, C*M] tensor
- res._dims.resize(4);
- res._dims.at(0) = 1;
- res._dims.at(1) = shape.height().value();
- res._dims.at(2) = shape.width().value();
- res._dims.at(3) = shape.depth().value() * shape.multiplier().value();
-
- return res;
-}
-
-ShapeDescription to_shape_description(const loco::BiasShape &shape)
-{
- ShapeDescription res;
-
- res._rank_known = true;
-
- res._dims.resize(1);
- res._dims.at(0) = shape.length().value();
-
- return res;
-}
-
-ShapeDescription to_shape_description(const loco::MatrixShape &shape)
-{
- ShapeDescription res;
-
- res._rank_known = true;
-
- res._dims.resize(2);
- res._dims.at(0) = shape.height().value();
- res._dims.at(1) = shape.width().value();
-
- return res;
-}
-
ShapeDescription to_shape_description(const loco::NodeShape &shape)
{
switch (shape.domain())
{
case loco::Domain::Tensor:
return to_shape_description(shape.as<loco::TensorShape>());
- case loco::Domain::Feature:
- return to_shape_description(shape.as<loco::FeatureShape>());
- case loco::Domain::Filter:
- return to_shape_description(shape.as<loco::FilterShape>());
- case loco::Domain::DepthwiseFilter:
- return to_shape_description(shape.as<loco::DepthwiseFilterShape>());
- case loco::Domain::Bias:
- return to_shape_description(shape.as<loco::BiasShape>());
- case loco::Domain::Matrix:
- return to_shape_description(shape.as<loco::MatrixShape>());
default:
break;
}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/ShapeDescription.h"
+
+#include <luci/IR/CircleNode.h>
+#include <luci/IR/Nodes/CircleConst.h>
+
+#include <gtest/gtest.h>
+
+TEST(ShapeDescriptionTest, CircleNode)
+{
+ // Use CircleConst as CircleNode
+ luci::CircleConst circle_const;
+ circle_const.shape({1, 2, 3, 4});
+
+ auto sd = luci::to_shape_description(&circle_const);
+
+ ASSERT_EQ(4, sd._dims.size());
+ ASSERT_EQ(1, sd._dims.at(0));
+ ASSERT_TRUE(sd._rank_known);
+}
+
+TEST(ShapeDescriptionTest, TensorShape)
+{
+ loco::TensorShape tensor_shape{1, 2, 3, 4};
+ loco::NodeShape node_shape(tensor_shape);
+
+ auto sd = luci::to_shape_description(node_shape);
+
+ ASSERT_EQ(4, sd._dims.size());
+ ASSERT_EQ(1, sd._dims.at(0));
+ ASSERT_TRUE(sd._rank_known);
+}
+
+TEST(ShapeDescriptionTest, BiasShape_NEG)
+{
+ loco::BiasShape bias_shape;
+ bias_shape.length() = 1;
+ loco::NodeShape node_shape(bias_shape);
+
+ EXPECT_THROW(luci::to_shape_description(node_shape), std::exception);
+}
#include "ShapeInfer_StridedSlice.h"
#include "Check.h"
+#include "CircleShapeInferenceHelper.h"
#include <luci/IR/CircleNode.h>
#include <loco/IR/DataType.h>
#include <loco/IR/NodeShape.h>
#include <oops/InternalExn.h>
-#include <loco/Service/ShapeInference.h>
#include <cmath>
#include <cstdint>
assert(node->new_axis_mask() == 0);
auto op_params = BuildStridedSliceParams(node);
- loco::TensorShape input_shape = loco::shape_get(input_node).as<loco::TensorShape>();
+ loco::TensorShape input_shape = luci::shape_get(input_node).as<loco::TensorShape>();
uint32_t num_input_axes = input_shape.rank();
assert(begin_node->size<S32>() <= num_input_axes);
#include <luci/Log.h>
#include <loco/IR/NodeShape.h>
-#include <loco/Service/ShapeInference.h>
-#include <loco/Service/TypeInference.h>
#include <cassert>
+#include <unordered_map>
#include <vector>
namespace
{
if (r)
os << ",";
- os << tensor_shape.dim(r).value();
+
+ if (tensor_shape.dim(r).known())
+ os << tensor_shape.dim(r).value();
+ else
+ os << "?";
}
os << "]";
return os;
{
if (r)
os << ",";
- os << circle_node->dim(r).value();
+
+ if (circle_node->dim(r).known())
+ os << circle_node->dim(r).value();
+ else
+ os << "?";
}
os << "]";
return os;
auto go_tensor_shape = graph_out->shape();
assert(go_tensor_shape);
+ // NOTE Even if shape of graph output is [] (which means "shape inference was impossible")
+ // but shape of CircleNode is not, it can be valid case because shape inference
+ // algorithm of CircleNode may be upgraded than before. The opposite is possible either.
+ // If such cases are appeared, following validation code should be fixed.
bool is_shape_valid = (circle_node->rank() == go_tensor_shape->rank());
for (uint32_t i = 0; is_shape_valid && i < circle_node->rank(); ++i)
- if (circle_node->dim(i).value() != go_tensor_shape->dim(i).value())
+ {
+ if (!circle_node->dim(i).known() || !go_tensor_shape->dim(i).known())
+ {
+ // If at least one of two dimensions is unknown,
+ // the unknown dimension can accept any value.
+ INFO(l) << "Unknown dimension is matched with known dimension" << std::endl;
+ }
+ else if (circle_node->dim(i).value() != go_tensor_shape->dim(i).value())
+ {
is_shape_valid = false;
+ }
+ }
if (is_shape_valid == false)
{
return true;
}
-bool validate_shape_signature(loco::Graph *g)
-{
- LOGGER(l);
-
- for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
- {
- auto circle_node = loco::must_cast<luci::CircleNode *>(node);
- const auto shape_signature = circle_node->shape_signature();
+} // namespace
- if (shape_signature.rank() == 0)
- continue;
+namespace luci
+{
- // Rank of shape and shape signature should be same
- if (circle_node->rank() != shape_signature.rank())
- {
- INFO(l) << "[luci] Rank of shape signature for " << circle_node->name() << " do not match"
- << std::endl;
- return false;
- }
+bool validate(loco::Graph *g)
+{
+ if (!loco::valid(g))
+ return false;
- bool has_unknown = false;
+ if (!validate_shape_dtype(g))
+ return false;
- // If shape siganture is not -1, dimension value should be same
- for (uint32_t d = 0; d < shape_signature.rank(); ++d)
- {
- if (shape_signature.dim(d) != -1 &&
- shape_signature.dim(d) != (int32_t)(circle_node->dim(d).value()))
- {
- INFO(l) << "[luci] Dimension " << d << "of shape signature for " << circle_node->name()
- << " do not match" << std::endl;
- return false;
- }
+ // TODO add more validation
- if (shape_signature.dim(d) == -1)
- has_unknown = true;
- }
+ return true;
+}
- // Shape signature should have at least one -1 value.
- if (!has_unknown)
- {
- INFO(l) << "[luci] Shape signature in " << circle_node->name()
- << " do not have unknown dimension" << std::endl;
+bool validate_name(loco::Graph *g)
+{
+ auto nodes = g->nodes();
+ for (uint32_t n = 0; n < nodes->size(); ++n)
+ {
+ auto node = loco::must_cast<luci::CircleNode *>(nodes->at(n));
+ auto name = node->name();
+ if (name.empty())
return false;
- }
}
return true;
}
-} // namespace
-
-namespace luci
+bool validate_unique_name(luci::Module *m)
{
+ std::unordered_map<std::string, bool> names_col;
-bool validate(loco::Graph *g)
-{
- if (!loco::valid(g))
- return false;
-
- if (!validate_shape_dtype(g))
- return false;
-
- if (!validate_shape_signature(g))
- return false;
+ for (size_t g = 0; g < m->size(); ++g)
+ {
+ auto graph = m->graph(g);
+ auto nodes = graph->nodes();
+ for (uint32_t n = 0; n < nodes->size(); ++n)
+ {
+ auto node = loco::must_cast<luci::CircleNode *>(nodes->at(n));
+ // skip CircleOutput as it may have same name with from() node
+ auto output = dynamic_cast<luci::CircleOutput *>(node);
+ if (output != nullptr)
+ continue;
+
+ auto name = node->name();
+ auto it = names_col.find(name);
+ if (it != names_col.end())
+ return false;
- // TODO add more validation
+ names_col[name] = true;
+ }
+ }
return true;
}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/Validate.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <luci/IR/Nodes/CircleAdd.h>
+#include <luci/IR/Nodes/CircleSqrt.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class SqrtGraphlet
+{
+public:
+ SqrtGraphlet() = default;
+
+public:
+ void init(loco::Graph *g, const ShapeU32 input_shape)
+ {
+ _sqrt = g->nodes()->create<luci::CircleSqrt>();
+ _sqrt->dtype(loco::DataType::S32);
+ _sqrt->name("sqrt");
+ }
+
+protected:
+ luci::CircleSqrt *_sqrt = nullptr;
+};
+
+class SqrtGraph : public TestIOGraph, public SqrtGraphlet
+{
+public:
+ SqrtGraph() = default;
+
+public:
+ void init(const ShapeU32 shape)
+ {
+ TestIOGraph::init(shape, shape);
+ SqrtGraphlet::init(g(), shape);
+
+ _sqrt->x(input());
+
+ output()->from(_sqrt);
+
+ // set output name to _sqrt: CircleOutput may have duplicate name
+ output()->name(_sqrt->name());
+ }
+};
+
+class Sqrt2xGraphlet
+{
+public:
+ Sqrt2xGraphlet() = default;
+
+public:
+ void init(loco::Graph *g, const ShapeU32 input_shape)
+ {
+ _sqrt1 = g->nodes()->create<luci::CircleSqrt>();
+ _sqrt1->dtype(loco::DataType::S32);
+ _sqrt1->name("sqrt");
+
+ _sqrt2 = g->nodes()->create<luci::CircleSqrt>();
+ _sqrt2->dtype(loco::DataType::S32);
+ _sqrt2->name("sqrt");
+ }
+
+protected:
+ luci::CircleSqrt *_sqrt1 = nullptr;
+ luci::CircleSqrt *_sqrt2 = nullptr;
+};
+
+class Sqrt2xGraph : public TestIOGraph, public Sqrt2xGraphlet
+{
+public:
+ Sqrt2xGraph() = default;
+
+public:
+ void init(const ShapeU32 shape)
+ {
+ TestIOGraph::init(shape, shape);
+ Sqrt2xGraphlet::init(g(), shape);
+
+ _sqrt1->x(input());
+
+ _sqrt2->x(_sqrt1);
+
+ output()->from(_sqrt2);
+ }
+};
+
+} // namespace
+
+TEST(ValidateTest, non_empty_name)
+{
+ SqrtGraph g;
+ g.init({3, 3});
+
+ ASSERT_TRUE(luci::validate_name(g.g()));
+}
+
+TEST(ValidateTest, unique_name)
+{
+ luci::Module module;
+
+ SqrtGraph g;
+ g.init({3, 3});
+ g.transfer_to(&module);
+
+ ASSERT_TRUE(luci::validate_unique_name(&module));
+}
+
+TEST(ValidateTest, unique_name_NEG)
+{
+ luci::Module module;
+
+ Sqrt2xGraph g;
+ g.init({3, 3});
+ g.transfer_to(&module);
+
+ ASSERT_FALSE(luci::validate_unique_name(&module));
+}
set(SRCS_READ_TESTER
src/ReadTester.cpp
+ src/ReadModule.cpp
)
add_executable(luci_readtester "${SRCS_READ_TESTER}")
set(SRCS_WRITE_TESTER
src/WriteTester.cpp
+ src/ReadModule.cpp
)
add_executable(luci_writetester "${SRCS_WRITE_TESTER}")
target_link_libraries(luci_writetester PRIVATE foder)
target_link_libraries(luci_writetester PRIVATE oops)
target_link_libraries(luci_writetester PRIVATE safemain)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(luci_readtester_test src/ReadTester.test.cpp ${SRCS_READ_TESTER})
+target_link_libraries(luci_readtester_test luci_import)
+target_link_libraries(luci_readtester_test luci_service)
+target_link_libraries(luci_readtester_test luci_pass)
+target_link_libraries(luci_readtester_test foder)
+
+GTest_AddTest(luci_writetester_test src/WriteTester.test.cpp ${SRCS_WRITE_TESTER})
+target_link_libraries(luci_writetester_test luci_import)
+target_link_libraries(luci_writetester_test luci_service)
+target_link_libraries(luci_writetester_test luci_pass)
+target_link_libraries(luci_writetester_test luci_export)
+target_link_libraries(luci_writetester_test foder)
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReadModule.h"
+
+#include <luci/Pass/CircleShapeInferencePass.h>
+#include <luci/Pass/CircleTypeInferencePass.h>
+#include <luci/Service/Validate.h>
+
+#include <logo/Phase.h>
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+std::unique_ptr<luci::Module> ReadModule(std::string &input_path)
+{
+ // Load model from the file
+ foder::FileLoader file_loader{input_path};
+ std::vector<char> model_data = file_loader.load();
+ const circle::Model *circle_model = circle::GetModel(model_data.data());
+ if (circle_model == nullptr)
+ {
+ std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl;
+ return nullptr;
+ }
+
+ luci::Importer importer;
+ auto module = importer.importModule(circle_model);
+ assert(module->size() > 0);
+
+ for (size_t g = 0; g < module->size(); ++g)
+ {
+ auto graph = module->graph(g);
+ if (graph == nullptr)
+ return nullptr;
+
+ {
+ logo::Phase phase;
+
+ phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+ phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
+
+ logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{graph};
+ phase_runner.run(phase);
+ }
+
+ if (!luci::validate(graph))
+ return nullptr;
+ }
+ return module;
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_TESTER_READ_MODULE_H__
+#define __LUCI_TESTER_READ_MODULE_H__
+
+#include <luci/Importer.h>
+#include <foder/FileLoader.h>
+
+#include <memory>
+#include <string>
+
+std::unique_ptr<luci::Module> ReadModule(std::string &input_path);
+
+#endif // __LUCI_TESTER_READ_MODULE_H__
* limitations under the License.
*/
-#include <foder/FileLoader.h>
-
-#include <luci/Importer.h>
-#include <luci/Service/Validate.h>
-#include <luci/Pass/ShapeInferencePass.h>
-#include <luci/Pass/TypeInferencePass.h>
-
-// Following passes will be removed after refactoring is finished
-#include <luci/Pass/MigrateLegacyShapeDtypePass.h>
+#include "ReadModule.h"
#include <iostream>
-#include <map>
#include <string>
namespace
std::cout << "[INFO] Circle is '" << input_path << "'" << std::endl;
- // Load model from the file
- foder::FileLoader file_loader{input_path};
- std::vector<char> model_data = file_loader.load();
- const circle::Model *circle_model = circle::GetModel(model_data.data());
- if (circle_model == nullptr)
- {
- std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl;
+ auto module = ReadModule(input_path);
+ if (module == nullptr)
return EXIT_FAILURE;
- }
-
- luci::Importer importer;
- auto module = importer.importModule(circle_model);
- assert(module->size() > 0);
- for (size_t g = 0; g < module->size(); ++g)
- {
- auto graph = module->graph(g);
- if (graph == nullptr)
- return 255;
-
- {
- luci::ShapeInferencePass pass;
- while (pass.run(graph) == true)
- ;
- }
- {
- luci::TypeInferencePass pass;
- while (pass.run(graph) == true)
- ;
- }
- {
- // This pass will be removed after refactoring is finished
- luci::MigrateLegacyShapeDtypePass pass;
- while (pass.run(graph) == true)
- ;
- }
-
- if (!luci::validate(graph))
- return 255;
- }
return 0;
}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+// From ReadTester.cpp
+int entry(int argc, char **argv);
+
+TEST(ReadTesterTest, invalid_argc_NEG)
+{
+ char argv_1[20];
+ strcpy(argv_1, "ReadTesterTest");
+
+ int argc = 1;
+ char *argv[] = {argv_1};
+
+ ASSERT_NE(0, entry(argc, argv));
+}
+
+TEST(ReadTesterTest, invalid_file_NEG)
+{
+ char argv_1[20], argv_2[20];
+ strcpy(argv_1, "ReadTesterTest");
+ strcpy(argv_2, "not_a_file");
+
+ int argc = 2;
+ char *argv[] = {argv_1, argv_2};
+
+ EXPECT_THROW(entry(argc, argv), std::runtime_error);
+}
* limitations under the License.
*/
-#include <foder/FileLoader.h>
+#include "ReadModule.h"
-#include <luci/Importer.h>
-#include <luci/Pass/ShapeInferencePass.h>
-#include <luci/Pass/TypeInferencePass.h>
-#include <luci/Service/Validate.h>
#include <luci/CircleExporter.h>
#include <oops/InternalExn.h>
-// Following passes will be removed after refactoring is finished
-#include <luci/Pass/MigrateLegacyShapeDtypePass.h>
-
#include <fstream>
#include <iostream>
-#include <map>
#include <string>
namespace
{
public:
CircleExpContract(loco::Graph *graph, const std::string &filename)
- : _graph(graph), _filepath(filename)
+ : _graph(graph), _filepath(filename)
{
// NOTHING TO DO
}
CircleExpContract(luci::Module *module, const std::string &filename)
- : _module(module), _filepath(filename)
+ : _module(module), _filepath(filename)
{
// NOTHING TO DO
}
std::cout << "[INFO] Circle from '" << input_path << "' to '" << output_path << "'" << std::endl;
- // Load model from the file
- foder::FileLoader file_loader{input_path};
- std::vector<char> model_data = file_loader.load();
- const circle::Model *circle_model = circle::GetModel(model_data.data());
- if (circle_model == nullptr)
- {
- std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl;
+ auto module = ReadModule(input_path);
+ if (module == nullptr)
return EXIT_FAILURE;
- }
-
- // Import from input Circle file
- luci::Importer importer;
- auto module = importer.importModule(circle_model);
- assert(module->size() > 0);
-
- for (size_t g = 0; g < module->size(); ++g)
- {
- auto graph = module->graph(g);
- if (graph == nullptr)
- return 255;
-
- {
- luci::ShapeInferencePass pass;
- while (pass.run(graph) == true)
- ;
- }
- {
- luci::TypeInferencePass pass;
- while (pass.run(graph) == true)
- ;
- }
- {
- // This pass will be removed after refactoring is finished
- luci::MigrateLegacyShapeDtypePass pass;
- while (pass.run(graph) == true)
- ;
- }
-
- if (!luci::validate(graph))
- return 255;
- }
// Export to output Circle file
luci::CircleExporter exporter;
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+// From WriteTester.cpp
+int entry(int argc, char **argv);
+
+TEST(WriteTesterTest, invalid_argc_NEG)
+{
+ char argv_1[20];
+ strcpy(argv_1, "WriteTesterTest");
+
+ int argc = 1;
+ char *argv[] = {argv_1};
+
+ ASSERT_NE(0, entry(argc, argv));
+}
+
+TEST(WriteTesterTest, invalid_file_NEG)
+{
+ char argv_1[20], argv_2[20], argv_3[20];
+ strcpy(argv_1, "WriteTesterTest");
+ strcpy(argv_2, "not_a_file");
+ strcpy(argv_3, "not_a_file");
+
+ int argc = 3;
+ char *argv[] = {argv_1, argv_2, argv_3};
+
+ EXPECT_THROW(entry(argc, argv), std::runtime_error);
+}
--- /dev/null
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+# NOTE we are using "*.test.cpp" NOT to be included in static analyzer tools
+
+# testhelper library itself
+set(HELPER_SOURCE
+ src/TestShape.test.cpp
+ )
+
+add_library(luci_testhelper STATIC ${HELPER_SOURCE})
+target_include_directories(luci_testhelper PRIVATE src)
+target_include_directories(luci_testhelper PUBLIC include)
+target_link_libraries(luci_testhelper luci_lang)
+
+# test for testhelper library
+set(TESTER_SOURCE
+ src/TestIOGraph.test.cpp
+ )
+
+GTest_AddTest(luci_testhelper_test ${TESTER_SOURCE})
+target_link_libraries(luci_testhelper_test luci_testhelper)
--- /dev/null
+# luci-testhelper
+
+_luci-testhelper_ provides Helper classes for unit testing
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_TESTHELPER_TEST_IO_GRAPH_H__
+#define __LUCI_TESTHELPER_TEST_IO_GRAPH_H__
+
+#include "TestShape.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/Module.h>
+
+#include <memory>
+#include <stdexcept>
+
+namespace luci
+{
+namespace test
+{
+
+/**
+ * @brief Graphlet with Inputs and loco::Graph for multiple inputs
+ * @note Every Graph will have Input(s) and Output(s)
+ * We put loco::Graph only in IsGraphlet not to declare separate
+ * class for loco::Graph
+ */
+template <unsigned N> class TestIsGraphlet
+{
+public:
+ TestIsGraphlet()
+ {
+ for (uint32_t n = 0; n < N; ++n)
+ {
+ _graph_inputs[n] = nullptr;
+ _inputs[n] = nullptr;
+ }
+ _g = loco::make_graph();
+ }
+
+public:
+ virtual void init(loco::Graph *g, const std::initializer_list<ShapeU32> shape_in)
+ {
+ if (shape_in.size() != N)
+ throw std::runtime_error("Failed to init TestIsGraphlet");
+
+ auto shpin = shape_in.begin();
+ for (uint32_t n = 0; n < N; ++n)
+ {
+ _graph_inputs[n] = g->inputs()->create();
+
+ _inputs[n] = g->nodes()->create<luci::CircleInput>();
+ _inputs[n]->shape(*shpin);
+ _inputs[n]->shape_status(luci::ShapeStatus::VALID);
+ _inputs[n]->dtype(loco::DataType::FLOAT32);
+ _inputs[n]->name("input_" + std::to_string(n));
+
+ _inputs[n]->index(_graph_inputs[n]->index());
+
+ auto input_shape = std::make_unique<loco::TensorShape>();
+ set_shape_vector(input_shape.get(), *shpin);
+ _graph_inputs[n]->shape(std::move(input_shape));
+ _graph_inputs[n]->dtype(loco::DataType::FLOAT32);
+
+ shpin++;
+ }
+ }
+
+public:
+ loco::Graph *g(void) { return _g.get(); }
+ luci::CircleInput *input(int idx) { return _inputs[idx]; }
+ uint32_t num_inputs(void) { return N; }
+
+public:
+ void transfer_to(luci::Module *module)
+ {
+ // WARNING: after g is transfered, _graph_inputs, _inputs
+ // and _graph_outputs, _outputs in TestOsGraphlet will be invalid.
+ // arrays are not cleared as this is just helpers to unit tests
+ module->add(std::move(_g));
+ }
+
+protected:
+ std::unique_ptr<loco::Graph> _g;
+ std::array<loco::GraphInput *, N> _graph_inputs;
+ std::array<luci::CircleInput *, N> _inputs;
+};
+
+/**
+ * @brief Graphlet with one Input
+ */
+class TestIGraphlet : public TestIsGraphlet<1>
+{
+public:
+ virtual void init(loco::Graph *g, const ShapeU32 shape_in)
+ {
+ TestIsGraphlet<1>::init(g, {shape_in});
+ }
+
+ luci::CircleInput *input() { return _inputs[0]; }
+};
+
+/**
+ * @brief Graphlet with Outputs for multiple outputs
+ */
+template <unsigned N> class TestOsGraphlet
+{
+public:
+ TestOsGraphlet()
+ {
+ for (uint32_t n = 0; n < N; ++n)
+ {
+ _graph_outputs[n] = nullptr;
+ _outputs[n] = nullptr;
+ }
+ }
+
+public:
+ virtual void init(loco::Graph *g, const std::initializer_list<ShapeU32> shape_out)
+ {
+ if (shape_out.size() != N)
+ throw std::runtime_error("Failed to init TestOsGraphlet");
+
+ auto shpout = shape_out.begin();
+ for (uint32_t n = 0; n < N; ++n)
+ {
+ _graph_outputs[n] = g->outputs()->create();
+
+ _outputs[n] = g->nodes()->create<luci::CircleOutput>();
+ _outputs[n]->shape(*shpout);
+ _outputs[n]->shape_status(luci::ShapeStatus::VALID);
+ _outputs[n]->dtype(loco::DataType::FLOAT32);
+ _outputs[n]->name("output_" + std::to_string(n));
+
+ _outputs[n]->index(_graph_outputs[n]->index());
+
+ auto output_shape = std::make_unique<loco::TensorShape>();
+ set_shape_vector(output_shape.get(), *shpout);
+ _graph_outputs[n]->shape(std::move(output_shape));
+ _graph_outputs[n]->dtype(loco::DataType::FLOAT32);
+
+ shpout++;
+ }
+ }
+
+public:
+ luci::CircleOutput *output(int idx) { return _outputs[idx]; }
+
+protected:
+ std::array<loco::GraphOutput *, N> _graph_outputs;
+ std::array<luci::CircleOutput *, N> _outputs;
+};
+
+/**
+ * @brief Graphlet with one Output
+ */
+class TestOGraphlet : public TestOsGraphlet<1>
+{
+public:
+ virtual void init(loco::Graph *g, const ShapeU32 shape_out)
+ {
+ TestOsGraphlet<1>::init(g, {shape_out});
+ }
+
+ luci::CircleOutput *output() { return _outputs[0]; }
+};
+
+/**
+ * @brief Graph with Input and Output
+ */
+class TestIOGraph : public TestIGraphlet, public TestOGraphlet
+{
+public:
+ TestIOGraph() = default;
+
+public:
+ virtual void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+ {
+ TestIGraphlet::init(g(), shape_in);
+ TestOGraphlet::init(g(), shape_out);
+ }
+};
+
+} // namespace test
+} // namespace luci
+
+#endif // __LUCI_TESTHELPER_TEST_IO_GRAPH_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_TESTHELPER_TEST_SHAPE_H__
+#define __LUCI_TESTHELPER_TEST_SHAPE_H__
+
+#include <luci/IR/CircleNode.h>
+
+#include <initializer_list>
+
+namespace luci
+{
+namespace test
+{
+
+using ShapeU32 = std::initializer_list<uint32_t>;
+using ShapeI32 = std::initializer_list<int32_t>;
+
+void set_shape_vector(loco::TensorShape *shape, const ShapeU32 &values);
+void set_shape_vector(luci::CircleConst *const_node, const ShapeI32 &values);
+
+uint32_t num_elements(const ShapeU32 shape);
+
+} // namespace test
+} // namespace luci
+
+#endif // __LUCI_TESTHELPER_TEST_SHAPE_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/test/TestIOGraph.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class SqrtGraphlet
+{
+public:
+ SqrtGraphlet() = default;
+
+ void init(loco::Graph *g)
+ {
+ _sqrt = g->nodes()->create<luci::CircleSqrt>();
+ _sqrt->name("sqrt");
+ }
+
+protected:
+ luci::CircleSqrt *_sqrt = nullptr;
+};
+
+class AddGraphlet
+{
+public:
+ AddGraphlet() = default;
+
+ void init(loco::Graph *g)
+ {
+ _add = g->nodes()->create<luci::CircleAdd>();
+ _add->name("add");
+ }
+
+protected:
+ luci::CircleAdd *_add = nullptr;
+};
+
+class ConvGraphlet
+{
+public:
+ ConvGraphlet() = default;
+
+ void init(loco::Graph *g)
+ {
+ _conv = g->nodes()->create<luci::CircleConv2D>();
+ _conv->name("conv");
+ }
+
+protected:
+ luci::CircleConv2D *_conv = nullptr;
+};
+
+} // namespace
+
+namespace
+{
+
+class TestOfTestIOGraph : public TestIOGraph, public SqrtGraphlet
+{
+public:
+ TestOfTestIOGraph() = default;
+
+public:
+ void init(void)
+ {
+ TestIOGraph::init({1}, {1});
+ SqrtGraphlet::init(g());
+
+ _sqrt->x(input());
+
+ output()->from(_sqrt);
+ }
+};
+
+class TestOfTestI2OGraph : public TestIsGraphlet<2>, public TestOGraphlet, public AddGraphlet
+{
+public:
+ TestOfTestI2OGraph() = default;
+
+public:
+ void init(void)
+ {
+ TestIsGraphlet<2>::init(g(), {{2, 3}, {2, 3}});
+ TestOsGraphlet<1>::init(g(), {{2, 3}});
+ AddGraphlet::init(g());
+
+ _add->x(input(0));
+ _add->y(input(1));
+
+ output()->from(_add);
+ }
+};
+
+class TestOfTestI3OGraph : public TestIsGraphlet<3>, public TestOGraphlet, public ConvGraphlet
+{
+public:
+ TestOfTestI3OGraph() = default;
+
+public:
+ void init(void)
+ {
+ TestIsGraphlet<3>::init(g(), {{2, 3, 3, 4}, {1, 1}, {4}});
+ TestOsGraphlet<1>::init(g(), {{2, 3, 3, 4}});
+ ConvGraphlet::init(g());
+
+ _conv->input(input(0));
+ _conv->filter(input(1));
+ _conv->bias(input(2));
+
+ output()->from(_conv);
+ }
+};
+
+class FailOfTestI3OGraph : public TestIsGraphlet<3>, public TestOGraphlet, public ConvGraphlet
+{
+public:
+ FailOfTestI3OGraph() = default;
+
+public:
+ void init(void)
+ {
+ TestIsGraphlet<3>::init(g(), {{2, 3, 3, 4}, {1, 1}});
+ TestOsGraphlet<1>::init(g(), {{2, 3, 3, 4}});
+ ConvGraphlet::init(g());
+
+ _conv->input(input(0));
+ _conv->filter(input(1));
+ _conv->bias(input(2));
+
+ output()->from(_conv);
+ }
+};
+
+} // namespace
+
+TEST(TestIOGraphTest, IOGraph_init)
+{
+ TestOfTestIOGraph tg;
+ tg.init();
+
+ SUCCEED();
+}
+
+TEST(TestIOGraphTest, I2OGraph_init)
+{
+ TestOfTestI2OGraph tg;
+ tg.init();
+
+ SUCCEED();
+}
+
+TEST(TestIOGraphTest, I3OGraph_init)
+{
+ TestOfTestI3OGraph tg;
+ tg.init();
+
+ SUCCEED();
+}
+
+TEST(TestIOGraphTest, I3OGraph_input_number_mismatch_NEG)
+{
+ FailOfTestI3OGraph fg;
+ EXPECT_THROW(fg.init(), std::runtime_error);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/test/TestShape.h"
+
+/**
+ * @note This file does not hold any test cases but provides methods for tests
+ */
+
+namespace luci
+{
+namespace test
+{
+
+void set_shape_vector(loco::TensorShape *shape, const ShapeU32 &values)
+{
+ uint32_t r = 0;
+ shape->rank(values.size());
+ for (auto v : values)
+ shape->dim(r++).set(v);
+}
+
+void set_shape_vector(luci::CircleConst *const_node, const ShapeI32 &values)
+{
+ const_node->rank(1);
+ const_node->dim(0).set(values.size());
+ const_node->shape_status(luci::ShapeStatus::VALID);
+ const_node->dtype(loco::DataType::S32);
+ const_node->size<loco::DataType::S32>(values.size());
+ uint32_t idx = 0;
+ for (auto val : values)
+ const_node->at<loco::DataType::S32>(idx++) = val;
+}
+
+uint32_t num_elements(const ShapeU32 shape)
+{
+ uint32_t result = 1;
+ for (auto val : shape)
+ result = result * val;
+ return result;
+}
+
+} // namespace test
+} // namespace luci
addread(ExpandDims_001)
addread(ExpandDims_002)
addread(ExpandDims_003)
+addread(ExpandDims_004)
+addread(FakeQuant_000)
addread(Fill_000)
addread(Fill_001)
addread(Floor_000)
addread(Shape_000)
addread(Sin_000)
addread(Slice_000)
+addread(Slice_001)
addread(Softmax_000)
addread(Softmax_U8_000)
addread(SpaceToBatchND_000)
addread(Square_000)
addread(SquaredDifference_000)
addread(Squeeze_000)
+addread(Squeeze_001)
addread(StridedSlice_000)
addread(StridedSlice_001)
addread(StridedSlice_002)
addwrite(ExpandDims_001)
addwrite(ExpandDims_002)
addwrite(ExpandDims_003)
+addwrite(ExpandDims_004)
+addwrite(FakeQuant_000)
addwrite(Fill_000)
addwrite(Fill_001)
addwrite(Floor_000)
addwrite(Shape_000)
addwrite(Sin_000)
addwrite(Slice_000)
+addwrite(Slice_001)
addwrite(Softmax_000)
addwrite(Softmax_U8_000)
addwrite(SpaceToBatchND_000)
addwrite(Square_000)
addwrite(SquaredDifference_000)
addwrite(Squeeze_000)
+addwrite(Squeeze_001)
addwrite(StridedSlice_000)
addwrite(StridedSlice_001)
addwrite(StridedSlice_002)
const int32_t shifted_lhs_val = lhs_val * (1 << left_shift);
const int32_t shifted_rhs_val = rhs_val * (1 << left_shift);
const int32_t scaled_lhs_val =
- MultiplyByQuantizedMultiplierSmallerThanOneExp(shifted_lhs_val, lhs_multiplier, lhs_shift);
+ MultiplyByQuantizedMultiplierSmallerThanOneExp(shifted_lhs_val, lhs_multiplier, lhs_shift);
const int32_t scaled_rhs_val =
- MultiplyByQuantizedMultiplierSmallerThanOneExp(shifted_rhs_val, rhs_multiplier, rhs_shift);
+ MultiplyByQuantizedMultiplierSmallerThanOneExp(shifted_rhs_val, rhs_multiplier, rhs_shift);
const int32_t raw_sum = scaled_lhs_val + scaled_rhs_val;
const int32_t raw_output =
- MultiplyByQuantizedMultiplierSmallerThanOneExp(raw_sum, output_multiplier, output_shift) +
- output_offset;
+ MultiplyByQuantizedMultiplierSmallerThanOneExp(raw_sum, output_multiplier, output_shift) +
+ output_offset;
const int32_t clamped_output = std::min(output_max, std::max(output_min, raw_output));
res_accessor.at(index) = static_cast<uint8_t>(clamped_output);
}
// Assuming NHWC format.
for (int i = 0; i < num_spatial_dims; ++i)
in_index.at(1 + i) =
- out_index.at(1 + i) * strides[i] + window_index.at(i) - padding_before[i];
+ out_index.at(1 + i) * strides[i] + window_index.at(i) - padding_before[i];
if (in_range.contains(in_index))
{
// Assuming NHWC format.
for (int i = 0; i < num_spatial_dims; ++i)
in_index.at(1 + i) =
- out_index.at(1 + i) * strides[i] + window_index.at(i) - padding_before[i];
+ out_index.at(1 + i) * strides[i] + window_index.at(i) - padding_before[i];
if (in_range.contains(in_index))
{
{
auto value = dequantize(arg_accessor.at(index), quant_info);
auto out_value =
- quantize(std::min(std::max(value, 0.0f), cap), result.getType().getQuantization());
+ quantize(std::min(std::max(value, 0.0f), cap), result.getType().getQuantization());
res_accessor.at(index) = out_value;
}
}
};
void ConcatImpl<uint8_t>::run(
- const std::vector<std::reference_wrapper<const mir::TensorVariant>> &inputs, int axis,
- mir::TensorVariant &output)
+ const std::vector<std::reference_wrapper<const mir::TensorVariant>> &inputs, int axis,
+ mir::TensorVariant &output)
{
const size_t inputs_count = inputs.size();
std::vector<int32_t> input_zeropoints(inputs_count);
for (int j = 0; j < copy_size; ++j)
{
const int32_t value =
- static_cast<int32_t>(std::round(input_ptr[j] * scale + bias)) + output_zeropoint;
+ static_cast<int32_t>(std::round(input_ptr[j] * scale + bias)) + output_zeropoint;
output_ptr[j] = static_cast<uint8_t>(std::max(std::min(255, value), 0));
}
}
if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
{
const std::int32_t in_offset =
- calcOffset(input_shape, batch, in_y, in_x, in_group_offset + in_c);
- const std::int32_t kernel_offset = calcOffset(
- kernel_shape, out_group_offset + out_c, kernel_y, kernel_x, in_c);
+ calcOffset(input_shape, batch, in_y, in_x, in_group_offset + in_c);
+ const std::int32_t kernel_offset =
+ calcOffset(kernel_shape, out_group_offset + out_c, kernel_y, kernel_x, in_c);
const T input_val = input_data[in_offset];
const T kernel_val = kernel_data[kernel_offset];
sum += kernel_val * input_val;
}
const std::int32_t out_offset =
- calcOffset(output_shape, batch, out_y, out_x, out_group_offset + out_c);
+ calcOffset(output_shape, batch, out_y, out_x, out_group_offset + out_c);
result_data[out_offset] = sum;
}
}
for (int32_t out_c = 0; out_c < num_out_channels; ++out_c)
{
const int32_t kernel_offset =
- calcOffset(kernel_shape, in_c, kernel_y, kernel_x, out_c);
+ calcOffset(kernel_shape, in_c, kernel_y, kernel_x, out_c);
const int32_t output_offset =
- calcOffset(output_shape, batch, out_y, out_x, out_c);
+ calcOffset(output_shape, batch, out_y, out_x, out_c);
const T kernel_val = kernel_data[kernel_offset];
output_data[output_offset] += input_val * kernel_val;
}
for (int32_t inner = 0; inner < inner_size; inner++)
{
output.atOffset((outer * num_indices + i) * inner_size + inner) =
- data.atOffset((outer * axis_size + index) * inner_size + inner);
+ data.atOffset((outer * axis_size + index) * inner_size + inner);
}
}
}
// Assuming NHWC format.
for (int i = 0; i < num_spatial_dims; ++i)
in_index.at(1 + i) =
- out_index.at(1 + i) * strides[i] + window_index.at(i) - padding_before[i];
+ out_index.at(1 + i) * strides[i] + window_index.at(i) - padding_before[i];
if (in_range.contains(in_index))
{
// Assuming NHWC format.
for (int i = 0; i < num_spatial_dims; ++i)
in_index.at(1 + i) =
- out_index.at(1 + i) * strides[i] + window_index.at(i) - padding_before[i];
+ out_index.at(1 + i) * strides[i] + window_index.at(i) - padding_before[i];
if (in_range.contains(in_index))
{
int left_shift = shift > 0 ? shift : 0;
int right_shift = shift > 0 ? 0 : -shift;
return RoundingDivideByPOT(
- SaturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier), right_shift);
+ SaturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier), right_shift);
}
inline int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp(int32_t x,
mir::Index expsum_index = res_index;
expsum_index.at(axis) = 0;
res_accessor.at(res_index) =
- std::exp(arg_accessor.at(res_index)) / expsum_accessor.at(expsum_index);
+ std::exp(arg_accessor.at(res_index)) / expsum_accessor.at(expsum_index);
}
}
const float prob_rescaled = table_offset[input_data[j]] * inv_sum_exp;
const int32_t prob_quantized = static_cast<int32_t>(prob_rescaled + 0.5);
output_data[j] =
- static_cast<uint8_t>(std::max(std::min(clamp_max, prob_quantized), clamp_min));
+ static_cast<uint8_t>(std::max(std::min(clamp_max, prob_quantized), clamp_min));
}
input_data += last_dim;
output_data += last_dim;
AffineQuantization() = default;
AffineQuantization(float scale, int zero_point)
- : _scale(scale), _zero_point(zero_point), _empty(false)
+ : _scale(scale), _zero_point(zero_point), _empty(false)
{
}
{
class ShapeIter
- : public std::iterator<std::forward_iterator_tag, Index, std::size_t, Index *, Index &>
+ : public std::iterator<std::forward_iterator_tag, Index, std::size_t, Index *, Index &>
{
public:
ShapeIter &operator++()
}
TensorType(DataType element_type, const Shape &shape, const AffineQuantization &quant)
- : _element_type(element_type), _shape(shape), _quantization(quant)
+ : _element_type(element_type), _shape(shape), _quantization(quant)
{
}
{
public:
AvgPool2DOp(Output *arg, const AvgPool2DOpAttributes &attributes)
- : Operation(Type::avgPool2D, {arg}), _attributes(attributes)
+ : Operation(Type::avgPool2D, {arg}), _attributes(attributes)
{
inferOutputTypes();
}
{
public:
ConcatOp(const std::vector<Output *> &args, int32_t axis)
- : Operation(Type::concat, args), _axis(axis)
+ : Operation(Type::concat, args), _axis(axis)
{
inferOutputTypes();
}
{
public:
Conv2DOp(Output *input, Output *kernel, const Conv2DOpAttributes &attributes)
- : Operation(Type::conv2D, {input, kernel}), _attributes(attributes)
+ : Operation(Type::conv2D, {input, kernel}), _attributes(attributes)
{
inferOutputTypes();
}
Conv2DOp(Output *input, Output *kernel, Output *bias, const Conv2DOpAttributes &attributes)
- : Operation(Type::conv2D, {input, kernel, bias}), _attributes(attributes)
+ : Operation(Type::conv2D, {input, kernel, bias}), _attributes(attributes)
{
inferOutputTypes();
}
{
public:
DeConv2DOp(Output *input, Output *kernel, const Deconv2DOpAttributes &attributes)
- : Operation(Type::deConv2D, {input, kernel}), _attributes(attributes)
+ : Operation(Type::deConv2D, {input, kernel}), _attributes(attributes)
{
inferOutputTypes();
}
DeConv2DOp(Output *input, Output *kernel, const Deconv2DOpAttributes &attributes,
const Shape &output_shape)
- : Operation(Type::deConv2D, {input, kernel}), _attributes(attributes)
+ : Operation(Type::deConv2D, {input, kernel}), _attributes(attributes)
{
assert(input->getElementType() == kernel->getElementType());
setOutputType(0, {input->getElementType(), output_shape});
{
public:
DepthwiseConv2DOp(Output *input, Output *kernel, const Conv2DOpAttributes &attributes)
- : Operation(Type::depthwiseConv, {input, kernel}), _attributes(attributes)
+ : Operation(Type::depthwiseConv, {input, kernel}), _attributes(attributes)
{
inferOutputTypes();
}
DepthwiseConv2DOp(Output *input, Output *kernel, Output *bias,
const Conv2DOpAttributes &attributes)
- : Operation(Type::depthwiseConv, {input, kernel, bias}), _attributes(attributes)
+ : Operation(Type::depthwiseConv, {input, kernel, bias}), _attributes(attributes)
{
inferOutputTypes();
}
{
public:
FullyConnectedOp(Output *input, Output *weights)
- : Operation(Type::fullyConnected, {input, weights})
+ : Operation(Type::fullyConnected, {input, weights})
{
inferOutputTypes();
}
FullyConnectedOp(Output *input, Output *weights, Output *bias)
- : Operation(Type::fullyConnected, {input, weights, bias})
+ : Operation(Type::fullyConnected, {input, weights, bias})
{
inferOutputTypes();
}
{
public:
GatherOp(Output *data, Output *indices, int32_t axis)
- : Operation(Type::gather, {data, indices}), _axis(axis)
+ : Operation(Type::gather, {data, indices}), _axis(axis)
{
inferOutputTypes();
}
{
public:
MaxPool2DOp(Output *arg, const MaxPool2DOpAttributes &attributes)
- : Operation(Type::maxPool2D, {arg}), _attributes(attributes)
+ : Operation(Type::maxPool2D, {arg}), _attributes(attributes)
{
inferOutputTypes();
}
{
public:
PadOp(Output *arg, const PadOpAttributes &attributes)
- : Operation(Type::pad, {arg}), _attributes(attributes)
+ : Operation(Type::pad, {arg}), _attributes(attributes)
{
assert(_attributes.padding_before.size() == _attributes.padding_after.size());
inferOutputTypes();
{
public:
ReduceMeanOp(Output *arg, const std::vector<int> &reduction_dims, bool keep_dims)
- : ReduceOp(Type::reduceMean, arg, reduction_dims, keep_dims)
+ : ReduceOp(Type::reduceMean, arg, reduction_dims, keep_dims)
{
}
{
protected:
ReduceOp(Type type, Output *arg, const std::vector<int> &reduction_dims, bool keep_dims)
- : Operation(type, {arg}), _reduction_dims(reduction_dims), _keep_dims(keep_dims)
+ : Operation(type, {arg}), _reduction_dims(reduction_dims), _keep_dims(keep_dims)
{
inferOutputTypes();
}
};
ResizeOp(Output *arg, ResizeMethod mode, const std::vector<float> &scales)
- : Operation(Type::resizeIm, {arg}), _mode(mode), _scales(scales)
+ : Operation(Type::resizeIm, {arg}), _mode(mode), _scales(scales)
{
// Infer output shape based on given scales.
auto &input_shape = getInputShape(0);
}
ResizeOp(Output *arg, ResizeMethod mode, const Shape &output_shape)
- : Operation(Type::resizeIm, {arg}), _mode(mode)
+ : Operation(Type::resizeIm, {arg}), _mode(mode)
{
// Calculate scales based on given shape.
auto &input_shape = getInputShape(0);
{
public:
SliceOp(Output *arg, const Shape &starts, const Shape &sizes)
- : Operation(Type::slice, {arg}), _starts(starts), _sizes(sizes)
+ : Operation(Type::slice, {arg}), _starts(starts), _sizes(sizes)
{
inferOutputTypes();
}
{
public:
SqueezeOp(Output *arg, const std::vector<std::int32_t> &dims_to_squeeze)
- : Operation(Type::squeeze, {arg}), _dims_to_squeeze(dims_to_squeeze)
+ : Operation(Type::squeeze, {arg}), _dims_to_squeeze(dims_to_squeeze)
{
// Infer output shape.
inferOutputTypes();
if (op->getType() == Operation::Type::input)
_inputs.erase(
- std::remove(_inputs.begin(), _inputs.end(), op)); // NOLINT(bugprone-inaccurate-erase)
+ std::remove(_inputs.begin(), _inputs.end(), op)); // NOLINT(bugprone-inaccurate-erase)
if (op->getType() == Operation::Type::output)
_outputs.erase(
- std::remove(_outputs.begin(), _outputs.end(), op)); // NOLINT(bugprone-inaccurate-erase)
+ std::remove(_outputs.begin(), _outputs.end(), op)); // NOLINT(bugprone-inaccurate-erase)
_ops.erase(op);
delete op;
}
Operation::Operation(Type type, const std::vector<Output *> &inputs, std::size_t num_outputs)
- : _type(type)
+ : _type(type)
{
for (std::size_t i = 0; i < inputs.size(); ++i)
{
for (int i = 0; i < num_dims; ++i)
{
const std::int32_t lhs_dim =
- (i >= num_dims - lhs_shape.rank()) ? lhs_shape.dim(i - (num_dims - lhs_shape.rank())) : 1;
+ (i >= num_dims - lhs_shape.rank()) ? lhs_shape.dim(i - (num_dims - lhs_shape.rank())) : 1;
const std::int32_t rhs_dim =
- (i >= num_dims - rhs_shape.rank()) ? rhs_shape.dim(i - (num_dims - rhs_shape.rank())) : 1;
+ (i >= num_dims - rhs_shape.rank()) ? rhs_shape.dim(i - (num_dims - rhs_shape.rank())) : 1;
if (lhs_dim == 1)
{
result_shape.dim(i) = rhs_dim;
}
TensorVariant::TensorVariant(DataType element_type, const Shape &shape)
- : TensorVariant(TensorType(element_type, shape))
+ : TensorVariant(TensorType(element_type, shape))
{
}
}
TensorVariant::TensorVariant(DataType element_type, const Shape &shape, const void *data)
- : TensorVariant(TensorType(element_type, shape), data)
+ : TensorVariant(TensorType(element_type, shape), data)
{
}
* @param shape shape to broadcast to
*/
TensorVariant::TensorVariant(const TensorVariant &t_old, const Shape &shape)
- : _type(t_old.getType().getElementType(), shape), _data(t_old._data),
- _strides(static_cast<size_t>(shape.rank())), _element_size(t_old._element_size)
+ : _type(t_old.getType().getElementType(), shape), _data(t_old._data),
+ _strides(static_cast<size_t>(shape.rank())), _element_size(t_old._element_size)
{
int axis_old = t_old.getShape().rank() - 1;
for (int d = shape.rank() - 1; d >= 0; d--)
Caffe2Importer::Caffe2Importer(std::string predict_net, std::string init_net,
const std::vector<std::vector<int>> &input_shapes)
- : _predictNet(std::move(predict_net)), _initNet(std::move(init_net))
+ : _predictNet(std::move(predict_net)), _initNet(std::move(init_net))
{
for (auto &shape : input_shapes)
_inputShapes.emplace_back(shape);
}
const std::map<std::string, SupportedCaffe2OpType> Caffe2Importer::_operatorTypes = {
- {"Add", SupportedCaffe2OpType::add},
- {"AveragePool", SupportedCaffe2OpType::averagePool},
- {"Conv", SupportedCaffe2OpType::conv},
- {"Concat", SupportedCaffe2OpType::concat},
- {"ConstantFill", SupportedCaffe2OpType::constantFill},
- {"Dropout", SupportedCaffe2OpType::dropout},
- {"FC", SupportedCaffe2OpType::FC},
- {"GivenTensorFill", SupportedCaffe2OpType::givenTensorFill},
- {"MaxPool", SupportedCaffe2OpType::maxPool},
- {"Mul", SupportedCaffe2OpType::mul},
- {"Relu", SupportedCaffe2OpType::relu},
- {"ResizeNearest", SupportedCaffe2OpType::resizeNearest},
- {"Sigmoid", SupportedCaffe2OpType::sigmoid},
- {"Softmax", SupportedCaffe2OpType::softmax},
- {"SpatialBN", SupportedCaffe2OpType::spatialBN},
- {"Sum", SupportedCaffe2OpType::sum},
- {"Clip", SupportedCaffe2OpType::clip},
- {"Reshape", SupportedCaffe2OpType::reshape},
- {"GivenTensorInt64Fill", SupportedCaffe2OpType::givenTensorInt64Fill},
+ {"Add", SupportedCaffe2OpType::add},
+ {"AveragePool", SupportedCaffe2OpType::averagePool},
+ {"Conv", SupportedCaffe2OpType::conv},
+ {"Concat", SupportedCaffe2OpType::concat},
+ {"ConstantFill", SupportedCaffe2OpType::constantFill},
+ {"Dropout", SupportedCaffe2OpType::dropout},
+ {"FC", SupportedCaffe2OpType::FC},
+ {"GivenTensorFill", SupportedCaffe2OpType::givenTensorFill},
+ {"MaxPool", SupportedCaffe2OpType::maxPool},
+ {"Mul", SupportedCaffe2OpType::mul},
+ {"Relu", SupportedCaffe2OpType::relu},
+ {"ResizeNearest", SupportedCaffe2OpType::resizeNearest},
+ {"Sigmoid", SupportedCaffe2OpType::sigmoid},
+ {"Softmax", SupportedCaffe2OpType::softmax},
+ {"SpatialBN", SupportedCaffe2OpType::spatialBN},
+ {"Sum", SupportedCaffe2OpType::sum},
+ {"Clip", SupportedCaffe2OpType::clip},
+ {"Reshape", SupportedCaffe2OpType::reshape},
+ {"GivenTensorInt64Fill", SupportedCaffe2OpType::givenTensorInt64Fill},
};
-}
+} // namespace
namespace mir_caffe2
{
{
int is_global_pooling = getSingleArgument(op, "global_pooling", 0);
bool has_custom_kernel_size =
- hasArgument(op.arg(), "kernel_h") || hasArgument(op.arg(), "kernel_w");
+ hasArgument(op.arg(), "kernel_h") || hasArgument(op.arg(), "kernel_w");
bool has_custom_kernels_size = hasArgument(op.arg(), "kernels");
int kernel_h(0), kernel_w(0);
if (has_custom_pad && hasArgument(op.arg(), "pad"))
throw std::runtime_error("Custom pad can't be combined with overall pad");
- if (has_custom_pad &&
- !(hasArgument(op.arg(), "pad_l") && hasArgument(op.arg(), "pad_r") &&
- hasArgument(op.arg(), "pad_t") && hasArgument(op.arg(), "pad_b")))
+ if (has_custom_pad && !(hasArgument(op.arg(), "pad_l") && hasArgument(op.arg(), "pad_r") &&
+ hasArgument(op.arg(), "pad_t") && hasArgument(op.arg(), "pad_b")))
throw std::runtime_error("If one custom pad specified - all custom pads must be specified");
// Kernel size
bool has_custom_kernel_size =
- hasArgument(op.arg(), "kernel_h") || hasArgument(op.arg(), "kernel_w");
+ hasArgument(op.arg(), "kernel_h") || hasArgument(op.arg(), "kernel_w");
if (has_custom_kernel_size && hasArgument(op.arg(), "kernel"))
throw std::runtime_error("Custom kernel size can't be combined with overall kernel size");
if (has_custom_kernel_size &&
!(hasArgument(op.arg(), "kernel_h") && hasArgument(op.arg(), "kernel_w")))
throw std::runtime_error(
- "If one custom kernel size specified - all custom kernel sizes must be specified");
+ "If one custom kernel size specified - all custom kernel sizes must be specified");
}
static mir::TensorVariant createTensor(const OperatorDef &op)
auto reshape = createOp<ops::ReshapeOp>(inputs[0], shape)->getOutput(0);
auto weights =
- createOp<ops::TransposeOp>(inputs[1], std::vector<std::size_t>{1, 0})->getOutput(0);
+ createOp<ops::TransposeOp>(inputs[1], std::vector<std::size_t>{1, 0})->getOutput(0);
auto result = createOp<ops::FullyConnectedOp>(reshape, weights)->getOutput(0);
result = createOp<ops::AddOp>(result, inputs[2])->getOutput(0);
scales[2] = getSingleArgument(op, "height_scale", 1.0f);
scales[3] = getSingleArgument(op, "width_scale", 1.0f);
auto result =
- createOp<ops::ResizeOp>(inputs[0], ops::ResizeOp::ResizeMethod::nearestNeighbor, scales)
- ->getOutput(0);
+ createOp<ops::ResizeOp>(inputs[0], ops::ResizeOp::ResizeMethod::nearestNeighbor, scales)
+ ->getOutput(0);
return {result};
}
// Sanity checks
if (op.input_size() != 5)
throw std::runtime_error(
- "SpatialBN must have exactly 5 inputs ('sums' and 'sumsq' are not supported yet)");
+ "SpatialBN must have exactly 5 inputs ('sums' and 'sumsq' are not supported yet)");
if (getSingleArgument(op, "is_test", 1) != 1)
throw std::runtime_error("SpatialBN: only test mode supported");
auto var_op = dynamic_cast<mir::ops::ConstantOp *>(inputs[4]->getNode());
if (scale_op == nullptr || bias_op == nullptr || mean_op == nullptr || var_op == nullptr)
throw std::runtime_error(
- "SpatialBN: non-constant 'scale', 'bias', 'mean' and 'var' inputs are not supported yet.");
+ "SpatialBN: non-constant 'scale', 'bias', 'mean' and 'var' inputs are not supported yet.");
const auto &scale_tensor = scale_op->getValue();
const auto &bias_tensor = bias_op->getValue();
}
const std::map<std::string, CaffeOpType> CaffeImporter::_operatorTypes = {
- {"AbsVal", CaffeOpType::absVal},
- {"Accuracy", CaffeOpType::accuracy},
- {"ArgMax", CaffeOpType::argMax},
- {"BatchNorm", CaffeOpType::batchNorm},
- {"BatchReindex", CaffeOpType::batchReindex},
- {"Bias", CaffeOpType::bias},
- {"BNLL", CaffeOpType::BNLL},
- {"Clip", CaffeOpType::clip},
- {"Concat", CaffeOpType::concat},
- {"ContrastiveLoss", CaffeOpType::contrastiveLoss},
- {"Convolution", CaffeOpType::convolution},
- {"Crop", CaffeOpType::crop},
- {"Data", CaffeOpType::data},
- {"Deconvolution", CaffeOpType::deconvolution},
- {"Dropout", CaffeOpType::dropout},
- {"DummyData", CaffeOpType::dummyData},
- {"Eltwise", CaffeOpType::eltwise},
- {"ELU", CaffeOpType::ELU},
- {"Embed", CaffeOpType::embed},
- {"EuclidianLoss", CaffeOpType::euclidianLoss},
- {"Exp", CaffeOpType::exp},
- {"Filter", CaffeOpType::filter},
- {"Flatten", CaffeOpType::flatten},
- {"HDF5Data", CaffeOpType::HDF5Data},
- {"HDF5Output", CaffeOpType::HDF5Output},
- {"HingeLoss", CaffeOpType::hingeLoss},
- {"Im2Col", CaffeOpType::im2Col},
- {"ImageData", CaffeOpType::imageData},
- {"InfogainLoss", CaffeOpType::infogainLoss},
- {"InnerProduct", CaffeOpType::innerProduct},
- {"Input", CaffeOpType::input},
- {"Log", CaffeOpType::log},
- {"LRN", CaffeOpType::LRN},
- {"LSTM", CaffeOpType::LSTM},
- {"MemoryData", CaffeOpType::memoryData},
- {"MultinomialLogisticLoss", CaffeOpType::multinomialLogisticLoss},
- {"MVN", CaffeOpType::MVN},
- {"Parameter", CaffeOpType::parameter},
- {"Pooling", CaffeOpType::pooling},
- {"Power", CaffeOpType::power},
- {"PReLU", CaffeOpType::PReLU},
- {"Python", CaffeOpType::python},
- {"Recurrent", CaffeOpType::recurrent},
- {"Reduction", CaffeOpType::reduction},
- {"ReLU", CaffeOpType::ReLU},
- {"Reshape", CaffeOpType::reshape},
- {"RNN", CaffeOpType::RNN},
- {"Scale", CaffeOpType::scale},
- {"SigmoidCrossEntropyLoss", CaffeOpType::sigmoidCrossEntropyLoss},
- {"Sigmoid", CaffeOpType::sigmoid},
- {"Silence", CaffeOpType::silence},
- {"Softmax", CaffeOpType::softmax},
- {"SoftmaxWithLoss", CaffeOpType::softmaxWithLoss},
- {"SPP", CaffeOpType::SPP},
- {"Split", CaffeOpType::split},
- {"Slice", CaffeOpType::slice},
- {"TanH", CaffeOpType::tanh},
- {"Threshold", CaffeOpType::threshold},
- {"Tile", CaffeOpType::tile},
- {"WindowData", CaffeOpType::windowData}};
+ {"AbsVal", CaffeOpType::absVal},
+ {"Accuracy", CaffeOpType::accuracy},
+ {"ArgMax", CaffeOpType::argMax},
+ {"BatchNorm", CaffeOpType::batchNorm},
+ {"BatchReindex", CaffeOpType::batchReindex},
+ {"Bias", CaffeOpType::bias},
+ {"BNLL", CaffeOpType::BNLL},
+ {"Clip", CaffeOpType::clip},
+ {"Concat", CaffeOpType::concat},
+ {"ContrastiveLoss", CaffeOpType::contrastiveLoss},
+ {"Convolution", CaffeOpType::convolution},
+ {"Crop", CaffeOpType::crop},
+ {"Data", CaffeOpType::data},
+ {"Deconvolution", CaffeOpType::deconvolution},
+ {"Dropout", CaffeOpType::dropout},
+ {"DummyData", CaffeOpType::dummyData},
+ {"Eltwise", CaffeOpType::eltwise},
+ {"ELU", CaffeOpType::ELU},
+ {"Embed", CaffeOpType::embed},
+ {"EuclidianLoss", CaffeOpType::euclidianLoss},
+ {"Exp", CaffeOpType::exp},
+ {"Filter", CaffeOpType::filter},
+ {"Flatten", CaffeOpType::flatten},
+ {"HDF5Data", CaffeOpType::HDF5Data},
+ {"HDF5Output", CaffeOpType::HDF5Output},
+ {"HingeLoss", CaffeOpType::hingeLoss},
+ {"Im2Col", CaffeOpType::im2Col},
+ {"ImageData", CaffeOpType::imageData},
+ {"InfogainLoss", CaffeOpType::infogainLoss},
+ {"InnerProduct", CaffeOpType::innerProduct},
+ {"Input", CaffeOpType::input},
+ {"Log", CaffeOpType::log},
+ {"LRN", CaffeOpType::LRN},
+ {"LSTM", CaffeOpType::LSTM},
+ {"MemoryData", CaffeOpType::memoryData},
+ {"MultinomialLogisticLoss", CaffeOpType::multinomialLogisticLoss},
+ {"MVN", CaffeOpType::MVN},
+ {"Parameter", CaffeOpType::parameter},
+ {"Pooling", CaffeOpType::pooling},
+ {"Power", CaffeOpType::power},
+ {"PReLU", CaffeOpType::PReLU},
+ {"Python", CaffeOpType::python},
+ {"Recurrent", CaffeOpType::recurrent},
+ {"Reduction", CaffeOpType::reduction},
+ {"ReLU", CaffeOpType::ReLU},
+ {"Reshape", CaffeOpType::reshape},
+ {"RNN", CaffeOpType::RNN},
+ {"Scale", CaffeOpType::scale},
+ {"SigmoidCrossEntropyLoss", CaffeOpType::sigmoidCrossEntropyLoss},
+ {"Sigmoid", CaffeOpType::sigmoid},
+ {"Silence", CaffeOpType::silence},
+ {"Softmax", CaffeOpType::softmax},
+ {"SoftmaxWithLoss", CaffeOpType::softmaxWithLoss},
+ {"SPP", CaffeOpType::SPP},
+ {"Split", CaffeOpType::split},
+ {"Slice", CaffeOpType::slice},
+ {"TanH", CaffeOpType::tanh},
+ {"Threshold", CaffeOpType::threshold},
+ {"Tile", CaffeOpType::tile},
+ {"WindowData", CaffeOpType::windowData}};
} // namespace
std::unique_ptr<mir::Graph> importModelFromBinaryFile(const std::string &filename)
{
// Assuming NCHW format.
const std::int32_t padded_input =
- input_shape.dim(2 + i) + attributes.padding_before[i] + attributes.padding_after[i];
+ input_shape.dim(2 + i) + attributes.padding_before[i] + attributes.padding_after[i];
if ((padded_input - attributes.window[i]) % attributes.strides[i] != 0)
++attributes.padding_after[i];
}
auto input = createOp<ops::TransposeOp>(inputs[0], std::vector<std::size_t>{0, 2, 3, 1});
auto softmax = createOp<ops::SoftmaxOp>(input->getOutput(0), axis);
auto result =
- createOp<ops::TransposeOp>(softmax->getOutput(0), std::vector<std::size_t>{0, 3, 1, 2});
+ createOp<ops::TransposeOp>(softmax->getOutput(0), std::vector<std::size_t>{0, 3, 1, 2});
return {result->getOutput(0)};
}
c_t = createOp<ops::AddOp>(createOp<ops::MulOp>(c_cont_t, f_t)->getOutput(0),
createOp<ops::MulOp>(i_t, g_t)->getOutput(0))
- ->getOutput(0);
+ ->getOutput(0);
h_t = createOp<ops::MulOp>(createOp<ops::TanhOp>(c_t)->getOutput(0), o_t)->getOutput(0);
h_slices[t] = h_t;
{
const auto &attributes = node.attribute();
const auto it = std::find_if(
- attributes.cbegin(), attributes.cend(),
- [&name](const onnx::AttributeProto &attribute) { return attribute.name() == name; });
+ attributes.cbegin(), attributes.cend(),
+ [&name](const onnx::AttributeProto &attribute) { return attribute.name() == name; });
if (it == attributes.cend())
return nullptr;
return &*it;
// Assuming input has NCHW format.
const std::int32_t residual = input_shape.dim(2 + i) % strides[i];
const std::int32_t total_pad = std::max(
- INT32_C(0), residual == 0 ? eff_window_size - strides[i] : eff_window_size - residual);
+ INT32_C(0), residual == 0 ? eff_window_size - strides[i] : eff_window_size - residual);
if (pad_type == "SAME_UPPER")
{
padding_before[i] = total_pad / 2;
}
bool is_foldable =
- std::all_of(op->getInputs().begin(), op->getInputs().end(), [](mir::Operation::Output *out) {
- return out->getNode()->getType() == mir::Operation::Type::constant;
- });
+ std::all_of(op->getInputs().begin(), op->getInputs().end(), [](mir::Operation::Output *out) {
+ return out->getNode()->getType() == mir::Operation::Type::constant;
+ });
if (!is_foldable)
return op;
auto opset = _modelCtx->getDomainOpsetVersion(onnx_node.domain());
NodeConverterRegistry::ConverterFunc converter =
- NodeConverterRegistry::getInstance().lookup(op_type, opset);
+ NodeConverterRegistry::getInstance().lookup(op_type, opset);
if (converter == nullptr)
problems_op_set.emplace(op_type, opset);
}
auto elem_type = onnxDataTypeToMirDataType(
- (onnx::TensorProto_DataType)input.type().tensor_type().elem_type());
+ (onnx::TensorProto_DataType)input.type().tensor_type().elem_type());
mir::TensorType type{elem_type, shape};
auto *op = _graph->create<mir::ops::InputOp>(type);
_converterCtx->setOutput(input.name(), op->getOutput(0));
auto opset = _modelCtx->getDomainOpsetVersion(onnx_node.domain());
// Get converter
NodeConverterRegistry::ConverterFunc converter =
- NodeConverterRegistry::getInstance().lookup(op_type, opset);
+ NodeConverterRegistry::getInstance().lookup(op_type, opset);
assert(converter != nullptr);
converter(onnx_node, _converterCtx.get());
}
const VersionMap &conv_map = it->second;
auto res = std::lower_bound(
- conv_map.crbegin(), conv_map.crend(), opset,
- [](const VersionMap::value_type &pair, int64_t opset) { return pair.first > opset; });
+ conv_map.crbegin(), conv_map.crend(), opset,
+ [](const VersionMap::value_type &pair, int64_t opset) { return pair.first > opset; });
if (res == conv_map.crend())
{
constexpr int num_spatial_dims = 2;
const auto strides =
- getAttributeValue(onnx_node, "strides", std::vector<std::int32_t>(num_spatial_dims, 1));
+ getAttributeValue(onnx_node, "strides", std::vector<std::int32_t>(num_spatial_dims, 1));
if (strides.size() != num_spatial_dims)
throw std::runtime_error("AveragePool: attribute 'strides' has incorrect size.");
if (scale_op == nullptr || mean_op == nullptr || var_op == nullptr)
throw std::runtime_error(
- "BatchNormalization: only constant 'scale', 'mean' and 'variance' inputs are supported.");
+ "BatchNormalization: only constant 'scale', 'mean' and 'variance' inputs are supported.");
mir::Tensor<float> scale_accessor(scale_op->getValue());
mir::Tensor<float> mean_accessor(mean_op->getValue());
{
auto bias = inputs[2];
bias = createOp<mir::ops::ReshapeOp>(graph, bias, mir::Shape{1, bias->getShape().dim(0), 1, 1})
- ->getOutput(0);
+ ->getOutput(0);
result = createOp<mir::ops::AddOp>(graph, result, bias)->getOutput(0);
}
constexpr int num_spatial_dims = 2;
const auto dilations =
- getAttributeValue(onnx_node, "dilations", std::vector<std::int32_t>(num_spatial_dims, 1));
+ getAttributeValue(onnx_node, "dilations", std::vector<std::int32_t>(num_spatial_dims, 1));
if (dilations.size() != num_spatial_dims)
throw std::runtime_error("ConvTranspose: attribute 'dilations' has incorrect size.");
if (!std::all_of(dilations.cbegin(), dilations.cend(), [](std::int32_t x) { return x == 1; }))
throw std::runtime_error("ConvTranspose: attribute 'dilations' has unsupported value.");
const auto strides =
- getAttributeValue(onnx_node, "strides", std::vector<std::int32_t>(num_spatial_dims, 1));
+ getAttributeValue(onnx_node, "strides", std::vector<std::int32_t>(num_spatial_dims, 1));
if (strides.size() != num_spatial_dims)
throw std::runtime_error("ConvTranspose: attribute 'strides' has incorrect size.");
- const auto output_padding = getAttributeValue(onnx_node, "output_padding",
- std::vector<std::int32_t>(num_spatial_dims, 0));
+ const auto output_padding =
+ getAttributeValue(onnx_node, "output_padding", std::vector<std::int32_t>(num_spatial_dims, 0));
if (output_padding.size() != num_spatial_dims)
throw std::runtime_error("ConvTranspose: attribute 'output_padding' has incorrect size.");
if (!std::all_of(output_padding.cbegin(), output_padding.cend(),
// Assuming kernel has IOHW format.
assert(kernel->getShape().rank() == 4);
const auto kernel_size = getAttributeValue(
- onnx_node, "kernel_shape",
- std::vector<std::int32_t>{kernel->getShape().dim(2), kernel->getShape().dim(3)});
+ onnx_node, "kernel_shape",
+ std::vector<std::int32_t>{kernel->getShape().dim(2), kernel->getShape().dim(3)});
if (kernel_size.size() != num_spatial_dims)
throw std::runtime_error("ConvTranspose: attribute 'kernel_shape' has incorrect size.");
attributes.strides = strides;
attributes.data_format = mir::DataFormat::NCHW;
attributes.padding_type = mir::ops::PaddingType::SameUpper;
- result = createOp<mir::ops::DeConv2DOp>(graph, input, kernel, attributes, output_shape)
- ->getOutput(0);
+ result =
+ createOp<mir::ops::DeConv2DOp>(graph, input, kernel, attributes, output_shape)->getOutput(0);
}
else
{
// TODO This code was not tested.
throw std::runtime_error(
- "ConvTranspose: absence of attribute 'output_shape' is not supported.");
+ "ConvTranspose: absence of attribute 'output_shape' is not supported.");
std::vector<std::int32_t> padding_before(num_spatial_dims, 0);
std::vector<std::int32_t> padding_after(num_spatial_dims, 0);
if (const auto *pads_attr = findAttribute(onnx_node, "pads"))
{
auto bias = inputs[2];
bias = createOp<mir::ops::ReshapeOp>(graph, bias, mir::Shape{1, bias->getShape().dim(0), 1, 1})
- ->getOutput(0);
+ ->getOutput(0);
result = createOp<mir::ops::AddOp>(graph, result, bias)->getOutput(0);
}
constexpr int num_spatial_dims = 2;
const auto strides =
- getAttributeValue(onnx_node, "strides", std::vector<std::int32_t>(num_spatial_dims, 1));
+ getAttributeValue(onnx_node, "strides", std::vector<std::int32_t>(num_spatial_dims, 1));
if (strides.size() != num_spatial_dims)
throw std::runtime_error("MaxPool: attribute 'strides' has incorrect size.");
mir::Graph *graph = context->getGraph();
auto result =
- createOp<mir::ops::ReduceMeanOp>(graph, inputs[0], reduce_dims, keep_dims)->getOutput(0);
+ createOp<mir::ops::ReduceMeanOp>(graph, inputs[0], reduce_dims, keep_dims)->getOutput(0);
context->setNodeOutputs(onnx_node, {result});
}
scales_vector.at(3) = w_scale;
auto result =
- createOp<mir::ops::ResizeOp>(graph, inputs[0],
- mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, scales_vector)
- ->getOutput(0);
+ createOp<mir::ops::ResizeOp>(graph, inputs[0],
+ mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, scales_vector)
+ ->getOutput(0);
context->setNodeOutputs(onnx_node, {result});
}
if (scales_attr->floats_size() != inputs[0]->getShape().rank())
throw std::runtime_error(
- "Number of elements of scales should be the same as the rank of input");
+ "Number of elements of scales should be the same as the rank of input");
assert(inputs[0]->getShape().rank() == 4 && "Only rank 4 is supported");
std::vector<float> scales_vector(4);
scales_vector.at(3) = scales_attr->floats(3);
auto result =
- createOp<mir::ops::ResizeOp>(graph, inputs[0],
- mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, scales_vector)
- ->getOutput(0);
+ createOp<mir::ops::ResizeOp>(graph, inputs[0],
+ mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, scales_vector)
+ ->getOutput(0);
context->setNodeOutputs(onnx_node, {result});
}
scales_vector[i] = scales_tensor.atOffset(i);
auto result =
- createOp<mir::ops::ResizeOp>(graph, inputs[0],
- mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, scales_vector)
- ->getOutput(0);
+ createOp<mir::ops::ResizeOp>(graph, inputs[0],
+ mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, scales_vector)
+ ->getOutput(0);
context->setNodeOutputs(onnx_node, {result});
}
}
static const std::set<tflite::BuiltinOperator> supportedOperators = {
- tflite::BuiltinOperator_ADD,
- tflite::BuiltinOperator_AVERAGE_POOL_2D,
- tflite::BuiltinOperator_CONCATENATION,
- tflite::BuiltinOperator_CONV_2D,
- tflite::BuiltinOperator_DEPTHWISE_CONV_2D,
- tflite::BuiltinOperator_DIV,
- tflite::BuiltinOperator_FULLY_CONNECTED,
- tflite::BuiltinOperator_HARD_SWISH,
- tflite::BuiltinOperator_LEAKY_RELU,
- tflite::BuiltinOperator_LOGISTIC,
- tflite::BuiltinOperator_MAX_POOL_2D,
- tflite::BuiltinOperator_MAXIMUM,
- tflite::BuiltinOperator_MEAN,
- tflite::BuiltinOperator_MUL,
- tflite::BuiltinOperator_PAD,
- tflite::BuiltinOperator_RELU,
- tflite::BuiltinOperator_RELU6,
- tflite::BuiltinOperator_RESHAPE,
- tflite::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
- tflite::BuiltinOperator_RSQRT,
- tflite::BuiltinOperator_SHAPE,
- tflite::BuiltinOperator_SLICE,
- tflite::BuiltinOperator_SOFTMAX,
- tflite::BuiltinOperator_SQRT,
- tflite::BuiltinOperator_SQUARED_DIFFERENCE,
- tflite::BuiltinOperator_SQUEEZE,
- tflite::BuiltinOperator_STRIDED_SLICE,
- tflite::BuiltinOperator_SUB,
- tflite::BuiltinOperator_TANH,
- tflite::BuiltinOperator_TRANSPOSE,
- tflite::BuiltinOperator_TRANSPOSE_CONV,
+ tflite::BuiltinOperator_ADD,
+ tflite::BuiltinOperator_AVERAGE_POOL_2D,
+ tflite::BuiltinOperator_CONCATENATION,
+ tflite::BuiltinOperator_CONV_2D,
+ tflite::BuiltinOperator_DEPTHWISE_CONV_2D,
+ tflite::BuiltinOperator_DIV,
+ tflite::BuiltinOperator_FULLY_CONNECTED,
+ tflite::BuiltinOperator_HARD_SWISH,
+ tflite::BuiltinOperator_LEAKY_RELU,
+ tflite::BuiltinOperator_LOGISTIC,
+ tflite::BuiltinOperator_MAX_POOL_2D,
+ tflite::BuiltinOperator_MAXIMUM,
+ tflite::BuiltinOperator_MEAN,
+ tflite::BuiltinOperator_MUL,
+ tflite::BuiltinOperator_PAD,
+ tflite::BuiltinOperator_RELU,
+ tflite::BuiltinOperator_RELU6,
+ tflite::BuiltinOperator_RESHAPE,
+ tflite::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
+ tflite::BuiltinOperator_RSQRT,
+ tflite::BuiltinOperator_SHAPE,
+ tflite::BuiltinOperator_SLICE,
+ tflite::BuiltinOperator_SOFTMAX,
+ tflite::BuiltinOperator_SQRT,
+ tflite::BuiltinOperator_SQUARED_DIFFERENCE,
+ tflite::BuiltinOperator_SQUEEZE,
+ tflite::BuiltinOperator_STRIDED_SLICE,
+ tflite::BuiltinOperator_SUB,
+ tflite::BuiltinOperator_TANH,
+ tflite::BuiltinOperator_TRANSPOSE,
+ tflite::BuiltinOperator_TRANSPOSE_CONV,
};
void TfliteImporter::collectUnsupportedOps()
outputs = _opCreator->convertConv2D(op->builtin_options.AsConv2DOptions(), inputs);
break;
case tflite::BuiltinOperator_DEPTHWISE_CONV_2D:
- outputs = _opCreator->convertDepthwiseConv2D(op->builtin_options.AsDepthwiseConv2DOptions(),
- inputs);
+ outputs =
+ _opCreator->convertDepthwiseConv2D(op->builtin_options.AsDepthwiseConv2DOptions(), inputs);
break;
case tflite::BuiltinOperator_MAX_POOL_2D:
outputs = _opCreator->convertMaxPool2D(op->builtin_options.AsPool2DOptions(), inputs);
break;
case tflite::BuiltinOperator_CONCATENATION:
outputs =
- _opCreator->convertConcatenation(op->builtin_options.AsConcatenationOptions(), inputs);
+ _opCreator->convertConcatenation(op->builtin_options.AsConcatenationOptions(), inputs);
break;
case tflite::BuiltinOperator_RESHAPE:
outputs = _opCreator->convertReshape(op->builtin_options.AsReshapeOptions(), inputs);
break;
case tflite::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR:
outputs = _opCreator->convertResizeNearestNeighbor(
- op->builtin_options.AsResizeNearestNeighborOptions(), inputs);
+ op->builtin_options.AsResizeNearestNeighborOptions(), inputs);
break;
case tflite::BuiltinOperator_MEAN:
outputs = _opCreator->convertMean(op->builtin_options.AsReducerOptions(), inputs);
break;
case tflite::BuiltinOperator_FULLY_CONNECTED:
outputs =
- _opCreator->convertFullyConnected(op->builtin_options.AsFullyConnectedOptions(), inputs);
+ _opCreator->convertFullyConnected(op->builtin_options.AsFullyConnectedOptions(), inputs);
break;
case tflite::BuiltinOperator_SOFTMAX:
outputs = _opCreator->convertSoftmax(op->builtin_options.AsSoftmaxOptions(), inputs);
break;
case tflite::BuiltinOperator_TRANSPOSE_CONV:
outputs =
- _opCreator->convertTransposeConv(op->builtin_options.AsTransposeConvOptions(), inputs);
+ _opCreator->convertTransposeConv(op->builtin_options.AsTransposeConvOptions(), inputs);
break;
case tflite::BuiltinOperator_PAD:
outputs = _opCreator->convertPad(op->builtin_options.AsPadOptions(), inputs);
break;
case tflite::BuiltinOperator_STRIDED_SLICE:
outputs =
- _opCreator->convertStridedSlice(op->builtin_options.AsStridedSliceOptions(), inputs);
+ _opCreator->convertStridedSlice(op->builtin_options.AsStridedSliceOptions(), inputs);
break;
case tflite::BuiltinOperator_LEAKY_RELU:
outputs = _opCreator->convertLeakyReLU(op->builtin_options.AsLeakyReluOptions(), inputs);
{
// Assuming NHWC format.
const std::int32_t total_padding =
- (input_shape.dim(1 + i) % strides[i] == 0)
- ? std::max(0, window_size[i] - strides[i])
- : std::max(0, window_size[i] - input_shape.dim(1 + i) % strides[i]);
+ (input_shape.dim(1 + i) % strides[i] == 0)
+ ? std::max(0, window_size[i] - strides[i])
+ : std::max(0, window_size[i] - input_shape.dim(1 + i) % strides[i]);
padding_before[i] = total_padding / 2;
padding_after[i] = total_padding - padding_before[i];
}
Shape res_shape{input_shape.dim(0), size_tensor.at(mir::Index{0}), size_tensor.at(mir::Index{1}),
input_shape.dim(3)};
auto result =
- createOp<ops::ResizeOp>(input, ops::ResizeOp::ResizeMethod::nearestNeighbor, res_shape);
+ createOp<ops::ResizeOp>(input, ops::ResizeOp::ResizeMethod::nearestNeighbor, res_shape);
return {result->getOutput(0)};
}
// (in_size - window_size + 1 + stride - 1) / stride =
// (in_size - window_size) / stride + 1
output_shape.dim(spatial_dim_index) =
- (padded_input - _attributes.window[i]) / _attributes.strides[i] + 1;
+ (padded_input - _attributes.window[i]) / _attributes.strides[i] + 1;
}
setOutputType(0, {getInput(0)->getElementType(), output_shape});
// (in_size - kernel_size + 1 + stride - 1) / stride =
// (in_size - kernel_size) / stride + 1
output_shape.dim(spatial_dim_index) =
- (padded_input - kernel_shape.dim(1 + i)) / _attributes.strides[i] + 1;
+ (padded_input - kernel_shape.dim(1 + i)) / _attributes.strides[i] + 1;
}
auto dt = getInput(0)->getElementType();
{
const int spatial_dim_index = getDataSpatialDimIndex(_attributes.data_format, i);
const std::int32_t total_padding =
- (input_shape.dim(spatial_dim_index) - 1) * _attributes.strides[i] + kernel_shape.dim(i) -
- output_shape.dim(spatial_dim_index);
+ (input_shape.dim(spatial_dim_index) - 1) * _attributes.strides[i] + kernel_shape.dim(i) -
+ output_shape.dim(spatial_dim_index);
switch (_attributes.padding_type)
{
{
const int spatial_dim_index = getDataSpatialDimIndex(_attributes.data_format, i);
output_shape.dim(spatial_dim_index) =
- (input_shape.dim(spatial_dim_index) - 1) * _attributes.strides[i] + kernel_shape.dim(i) -
- (_attributes.padding_before.at(i) + _attributes.padding_after.at(i));
+ (input_shape.dim(spatial_dim_index) - 1) * _attributes.strides[i] + kernel_shape.dim(i) -
+ (_attributes.padding_before.at(i) + _attributes.padding_after.at(i));
}
setOutputType(0, {getInput(0)->getElementType(), output_shape});
// (in_size - kernel_size + 1 + stride - 1) / stride =
// (in_size - kernel_size) / stride + 1
output_shape.dim(spatial_dim_index) =
- (padded_input - kernel_shape.dim(i)) / _attributes.strides[i] + 1;
+ (padded_input - kernel_shape.dim(i)) / _attributes.strides[i] + 1;
}
setOutputType(0, {getInput(0)->getElementType(), output_shape});
// (in_size - window_size + 1 + stride - 1) / stride =
// (in_size - window_size) / stride + 1
output_shape.dim(spatial_dim_index) =
- (padded_input - _attributes.window[i]) / _attributes.strides[i] + 1;
+ (padded_input - _attributes.window[i]) / _attributes.strides[i] + 1;
}
setOutputType(0, {getInput(0)->getElementType(), output_shape});
for (int32_t dim = 0; dim < num_dims; ++dim)
{
out_shape.dim(dim) =
- _attributes.padding_before[dim] + input_shape.dim(dim) + _attributes.padding_after[dim];
+ _attributes.padding_before[dim] + input_shape.dim(dim) + _attributes.padding_after[dim];
}
setOutputType(0, {getInput(0)->getElementType(), out_shape});
{
TransposeOp::TransposeOp(Output *arg, const std::vector<std::size_t> &axis_order)
- : Operation(Type::transpose, {arg}), _axis_order(axis_order)
+ : Operation(Type::transpose, {arg}), _axis_order(axis_order)
{
assert(_axis_order.size() == static_cast<std::size_t>(getInputShape(0).rank()));
inferOutputTypes();
Shape output_shape(input_shape.rank());
for (std::size_t i = 0; i < _axis_order.size(); ++i)
output_shape.dim(static_cast<std::int64_t>(i)) =
- input_shape.dim(static_cast<int32_t>(_axis_order.at(i)));
+ input_shape.dim(static_cast<int32_t>(_axis_order.at(i)));
setOutputType(0, {getInput(0)->getElementType(), output_shape});
}
auto input = g.create<ops::InputOp>(input_type);
auto op =
- g.create<ops::ResizeOp>(input->getOutput(0), ops::ResizeOp::ResizeMethod::nearestNeighbor,
- std::vector<float>{1, 6, 2, 1});
+ g.create<ops::ResizeOp>(input->getOutput(0), ops::ResizeOp::ResizeMethod::nearestNeighbor,
+ std::vector<float>{1, 6, 2, 1});
ASSERT_EQ(result_shape, op->getOutputShape(0));
}
template <typename... Args>
explicit ParamType(int32_t actual_len, Args &&... args)
- : actual_length(actual_len), shape({static_cast<int32_t>(args)...})
+ : actual_length(actual_len), shape({static_cast<int32_t>(args)...})
{
}
};
loco::Pull *pull_node = dynamic_cast<loco::Pull *>(loco_graph->nodes()->at(0));
loco::FeatureEncode *encode_node =
- dynamic_cast<loco::FeatureEncode *>(loco_graph->nodes()->at(1));
+ dynamic_cast<loco::FeatureEncode *>(loco_graph->nodes()->at(1));
loco::AvgPool2D *pool_node = dynamic_cast<loco::AvgPool2D *>(loco_graph->nodes()->at(2));
loco::FeatureDecode *decode_node =
- dynamic_cast<loco::FeatureDecode *>(loco_graph->nodes()->at(3));
+ dynamic_cast<loco::FeatureDecode *>(loco_graph->nodes()->at(3));
loco::Push *push_node = dynamic_cast<loco::Push *>(loco_graph->nodes()->at(4));
ASSERT_NE(pull_node, nullptr);
loco::Pull *pull_node = dynamic_cast<loco::Pull *>(loco_graph->nodes()->at(0));
loco::FeatureEncode *encode_node =
- dynamic_cast<loco::FeatureEncode *>(loco_graph->nodes()->at(1));
+ dynamic_cast<loco::FeatureEncode *>(loco_graph->nodes()->at(1));
loco::MaxPool2D *pool_node = dynamic_cast<loco::MaxPool2D *>(loco_graph->nodes()->at(2));
loco::FeatureDecode *decode_node =
- dynamic_cast<loco::FeatureDecode *>(loco_graph->nodes()->at(3));
+ dynamic_cast<loco::FeatureDecode *>(loco_graph->nodes()->at(3));
loco::Push *push_node = dynamic_cast<loco::Push *>(loco_graph->nodes()->at(4));
ASSERT_NE(pull_node, nullptr);
loco::Pull *pull_node = dynamic_cast<loco::Pull *>(loco_graph->nodes()->at(0));
loco::Reshape<loco::ReshapeType::Fixed> *reshape_node =
- dynamic_cast<loco::Reshape<loco::ReshapeType::Fixed> *>(loco_graph->nodes()->at(1));
+ dynamic_cast<loco::Reshape<loco::ReshapeType::Fixed> *>(loco_graph->nodes()->at(1));
loco::Push *push_node = dynamic_cast<loco::Push *>(loco_graph->nodes()->at(2));
ASSERT_NE(pull_node, nullptr);
loco::Pull *pull_node = dynamic_cast<loco::Pull *>(loco_graph->nodes()->at(0));
loco::ConstGen *const_node = dynamic_cast<loco::ConstGen *>(loco_graph->nodes()->at(1));
loco::FeatureEncode *encode_node =
- dynamic_cast<loco::FeatureEncode *>(loco_graph->nodes()->at(2));
+ dynamic_cast<loco::FeatureEncode *>(loco_graph->nodes()->at(2));
loco::FilterEncode *filter_node = dynamic_cast<loco::FilterEncode *>(loco_graph->nodes()->at(3));
loco::Conv2D *conv_node = dynamic_cast<loco::Conv2D *>(loco_graph->nodes()->at(4));
loco::FeatureDecode *decode_node =
- dynamic_cast<loco::FeatureDecode *>(loco_graph->nodes()->at(5));
+ dynamic_cast<loco::FeatureDecode *>(loco_graph->nodes()->at(5));
loco::Push *push_node = dynamic_cast<loco::Push *>(loco_graph->nodes()->at(6));
ASSERT_NE(pull_node, nullptr);
loco::Pull *pull_node = dynamic_cast<loco::Pull *>(loco_graph->nodes()->at(0));
loco::TensorSoftmax *softmax_node =
- dynamic_cast<loco::TensorSoftmax *>(loco_graph->nodes()->at(1));
+ dynamic_cast<loco::TensorSoftmax *>(loco_graph->nodes()->at(1));
loco::Push *push_node = dynamic_cast<loco::Push *>(loco_graph->nodes()->at(2));
ASSERT_NE(pull_node, nullptr);
attributes.padding_after = {7, 4};
auto *conv =
- mir_graph.create<mir::ops::DepthwiseConv2DOp>(input, filter, attributes)->getOutput(0);
+ mir_graph.create<mir::ops::DepthwiseConv2DOp>(input, filter, attributes)->getOutput(0);
mir_graph.create<mir::ops::OutputOp>(conv);
input->setName("x");
loco::DepthwiseConv2D *dw_conv_node = dynamic_cast<loco::DepthwiseConv2D *>(*encode_uses.begin());
ASSERT_NE(dw_conv_node, nullptr);
loco::DepthwiseFilterEncode *filter_node =
- dynamic_cast<loco::DepthwiseFilterEncode *>(dw_conv_node->ker());
+ dynamic_cast<loco::DepthwiseFilterEncode *>(dw_conv_node->ker());
ASSERT_NE(filter_node, nullptr);
ASSERT_EQ(dw_conv_node->ifm(), encode_node);
// Check params
auto encode_uses = loco::succs(encode_node);
ASSERT_EQ(encode_uses.size(), 1);
loco::TransposedConv2D *tr_conv_node =
- dynamic_cast<loco::TransposedConv2D *>(*encode_uses.begin());
+ dynamic_cast<loco::TransposedConv2D *>(*encode_uses.begin());
ASSERT_NE(tr_conv_node, nullptr);
loco::FilterEncode *filter_node = dynamic_cast<loco::FilterEncode *>(tr_conv_node->ker());
ASSERT_NE(filter_node, nullptr);
mir::TensorType input_type{mir::DataType::FLOAT32, {2, 7, 9, 5}};
auto *input = mir_graph.create<mir::ops::InputOp>(input_type)->getOutput(0);
auto *transpose =
- mir_graph.create<mir::ops::TransposeOp>(input, std::vector<std::size_t>{3, 0, 1, 2})
- ->getOutput(0);
+ mir_graph.create<mir::ops::TransposeOp>(input, std::vector<std::size_t>{3, 0, 1, 2})
+ ->getOutput(0);
mir_graph.create<mir::ops::OutputOp>(transpose);
input->setName("x");
transpose->setName("y");
target_include_directories(moco_log PUBLIC include)
target_link_libraries(moco_log PUBLIC hermes)
target_link_libraries(moco_log PRIVATE hermes_std)
-target_link_libraries(moco_log PRIVATE stdex)
install(TARGETS moco_log DESTINATION lib)
#include "moco/Log.h"
#include <hermes/ConsoleReporter.h>
-#include <stdex/Memory.h>
+
+#include <memory>
namespace moco
{
if (ctx == nullptr)
{
ctx = new hermes::Context;
- ctx->sinks()->append(stdex::make_unique<hermes::ConsoleReporter>());
- ctx->config(stdex::make_unique<LoggerConfig>());
+ ctx->sinks()->append(std::make_unique<hermes::ConsoleReporter>());
+ ctx->config(std::make_unique<LoggerConfig>());
}
return ctx;
target_link_libraries(moco_tf_frontend PRIVATE bino)
target_link_libraries(moco_tf_frontend PRIVATE fipe)
target_link_libraries(moco_tf_frontend PRIVATE locop)
-target_link_libraries(moco_tf_frontend PRIVATE stdex)
target_link_libraries(moco_tf_frontend PRIVATE moco_log)
target_link_libraries(moco_tf_frontend PRIVATE pepper_str)
target_link_libraries(moco_tf_frontend PRIVATE pepper_strcast)
target_link_libraries(moco_tf_frontend_test locop)
target_link_libraries(moco_tf_frontend_test moco_log)
target_link_libraries(moco_tf_frontend_test moco_tf_frontend)
-target_link_libraries(moco_tf_frontend_test stdex)
target_link_libraries(moco_tf_frontend_test plier_tf)
target_link_libraries(moco_tf_frontend_test locoex_customop)
target_link_libraries(moco_tf_frontend_test logo)
require("loco")
require("moco")
require("locop")
-require("stdex")
require("moco-log")
require("pepper-strcast")
require("locomotiv")
* This mimics "tf.broadcast_to" API in TensorFlow.
*/
static inline auto broadcast_to(const loco::TensorShape &shape)
- -> decltype(bino::transform_both(std::declval<BroadcastFunctor>()))
+ -> decltype(bino::transform_both(std::declval<BroadcastFunctor>()))
{
return bino::transform_both(BroadcastFunctor{shape});
}
#include <loco/Service/ShapeInference.h>
-#include <stdex/Memory.h>
#include <oops/UserExn.h>
namespace
void set_filter_enc(loco::FilterEncode *filter_enc)
{
- auto enc = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
+ auto enc = std::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
// In TensorFlow, Conv2dBackpropInput's filter is a 4-D tensor of following shape:
// [filter_height, filter_width, out_channels, in_channels] or HWOI or HWNC (in/out in loco sense)
// 'tight fit' output. When output size (set by 'input sizes' node input) is
// larger than tight fit, extra spaces filled with zero.
auto tight_output_vertical = tight_output_for_valid_padding(
- input().vertical.value(), stride().vertical(), window().vertical());
+ input().vertical.value(), stride().vertical(), window().vertical());
auto tight_output_horizontal = tight_output_for_valid_padding(
- input().horizontal.value(), stride().horizontal(), window().horizontal());
+ input().horizontal.value(), stride().horizontal(), window().horizontal());
if (output().vertical.value() < tight_output_vertical or
output().horizontal.value() < tight_output_horizontal)
auto whole_pad_vertical = padding_needed(input().vertical.value(), output().vertical.value(),
stride().vertical(), window().vertical());
auto whole_pad_horizontal =
- padding_needed(input().horizontal.value(), output().horizontal.value(),
- stride().horizontal(), window().horizontal());
+ padding_needed(input().horizontal.value(), output().horizontal.value(), stride().horizontal(),
+ window().horizontal());
loco::Padding2D res;
void set_filter_enc(loco::FilterEncode *filter_enc)
{
- auto enc = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
+ auto enc = std::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
// In TensorFlow, conv2d filter is a 4-D tensor of following shape:
// [filter_height, filter_width, in_channels, out_channels] -> HWIO (HWCN)
void set_filter_enc(loco::DepthwiseFilterEncode *filter_enc)
{
- auto enc = stdex::make_unique<loco::PermutingEncoder<loco::Domain::DepthwiseFilter>>();
+ auto enc = std::make_unique<loco::PermutingEncoder<loco::Domain::DepthwiseFilter>>();
// In TensorFlow, depthwiseconv2dnative filter is a 4-D tensor of following shape:
// [filter_height, filter_width, in_channels, channel_multiplier] -> HWCM
LOGGER(l);
/**
- * @note This will replace TFDepthwiseConv2dNative node with Canonical FeatureEncode +
- * DepthwiseFilterEncode + DepthwiseConv2D + FeatureDecode
- *
- * Before
- * A -+- TFDepthwiseConv2dNative - C
- * |
- * B -+
- *
- * After
- *
- * A -+ FeatureEncode ----------------+- DepthwiseConv2D - FeatureDecode - C
- * | |
- * +-(TFDepthwiseConv2dNative) |
- * | |
- * B -+ DepthwiseFilterEncode --------+
- *
- * Where
- * A : ifm of TFDepthwiseConv2dNative
- * B : ker of TFDepthwiseConv2dNative
- * C : a node that uses TFDepthwiseConv2dNative as an input
- * TFDepthwiseConv2dNative is disconnected from other nodes
- */
+ * @note This will replace TFDepthwiseConv2dNative node with Canonical FeatureEncode +
+ * DepthwiseFilterEncode + DepthwiseConv2D + FeatureDecode
+ *
+ * Before
+ * A -+- TFDepthwiseConv2dNative - C
+ * |
+ * B -+
+ *
+ * After
+ *
+ * A -+ FeatureEncode ----------------+- DepthwiseConv2D - FeatureDecode - C
+ * | |
+ * +-(TFDepthwiseConv2dNative) |
+ * | |
+ * B -+ DepthwiseFilterEncode --------+
+ *
+ * Where
+ * A : ifm of TFDepthwiseConv2dNative
+ * B : ker of TFDepthwiseConv2dNative
+ * C : a node that uses TFDepthwiseConv2dNative as an input
+ * TFDepthwiseConv2dNative is disconnected from other nodes
+ */
INFO(l) << "TFNodeCanonicalize TFDepthwiseConv2dNative begin";
#include "loco/Service/TypeInference.h"
-#include <stdex/Memory.h>
-
namespace
{
#include <moco/IR/TFDialect.h>
-#include <stdex/Memory.h>
-
namespace
{
#include <moco/IR/TFDialect.h>
-#include <stdex/Memory.h>
-
namespace
{
#include <loco/Service/TypeInference.h>
-#include <stdex/Memory.h>
#include <oops/UserExn.h>
namespace
INFO(l) << "TFNodeCanonicalize TFSoftmax begin";
/**
- * This will replace shape inferred TFSoftmax node into canonical TensorSoftmax
- *
- * Before
- * In ---- TFSoftmax ---- Out(s)
- *
- * After
- * ------ TFSoftmax
- * /
- * In ---- TensorSoftmax ----- Out(s)
- */
+ * This will replace shape inferred TFSoftmax node into canonical TensorSoftmax
+ *
+ * Before
+ * In ---- TFSoftmax ---- Out(s)
+ *
+ * After
+ * ------ TFSoftmax
+ * /
+ * In ---- TensorSoftmax ----- Out(s)
+ */
auto nodeshape = moco::node_shape(node);
// Canonicalization into TensorSoftmax is valid when softmax has shape info
{
/**
-* @brief Canonicalize TF-dialect TFSoftmax into canonical Softmax node
-*/
+ * @brief Canonicalize TF-dialect TFSoftmax into canonical Softmax node
+ */
class SoftmaxCanonicalizer : public SimpleNodeTransform<moco::TFSoftmax>
{
public:
INFO(l) << "TFNodeCanonicalize TFStopGradient begin";
/**
- * This will replace shape inferred TFStopGradient node into canonical Forward
- *
- * Before
- * In --- TFStopGradient --- Out(s)
- *
- * After
- * -- TFStopGradient
- * /
- * In --- Forward --- Out(s)
- */
+ * This will replace shape inferred TFStopGradient node into canonical Forward
+ *
+ * Before
+ * In --- TFStopGradient --- Out(s)
+ *
+ * After
+ * -- TFStopGradient
+ * /
+ * In --- Forward --- Out(s)
+ */
// Create loco node to replace
auto forward_node = graph->nodes()->create<loco::Forward>();
{
/**
-* @brief Canonicalize TF-dialect TFStopGradient into canonical Forward node
-*/
+ * @brief Canonicalize TF-dialect TFStopGradient into canonical Forward node
+ */
class StopGradientCanonicalizer : public SimpleNodeTransform<moco::TFStopGradient>
{
public:
#include <moco/IR/TFDialect.h>
-#include <stdex/Memory.h>
-
namespace
{
#include <moco/IR/TFDialect.h>
-#include <stdex/Memory.h>
-
namespace
{
#include <logo/Phase.h>
-#include <stdex/Memory.h>
-
+#include <memory>
#include <cassert>
namespace
/* TRANSFORM DECLARATION BEGIN */
// Run shape and type inference at the top
- phase.emplace_back(stdex::make_unique<ShapeInferencePass>());
- phase.emplace_back(stdex::make_unique<TypeInferencePass>());
+ phase.emplace_back(std::make_unique<ShapeInferencePass>());
+ phase.emplace_back(std::make_unique<TypeInferencePass>());
- phase.emplace_back(stdex::make_unique<AddCanonicalizer>());
- phase.emplace_back(stdex::make_unique<AvgPoolCanonicalizer>());
+ phase.emplace_back(std::make_unique<AddCanonicalizer>());
+ phase.emplace_back(std::make_unique<AvgPoolCanonicalizer>());
if (moco::tf::get<moco::tf::Knob::CanonicalizeBiasAdd>())
- phase.emplace_back(stdex::make_unique<BiasAddCanonicalizer>());
- phase.emplace_back(stdex::make_unique<ConcatV2Canonicalizer>());
+ phase.emplace_back(std::make_unique<BiasAddCanonicalizer>());
+ phase.emplace_back(std::make_unique<ConcatV2Canonicalizer>());
if (moco::tf::get<moco::tf::Knob::CanonicalizeConst>())
- phase.emplace_back(stdex::make_unique<ConstCanonicalizer>());
- phase.emplace_back(stdex::make_unique<Conv2DBackpropInputCanonicalizer>());
+ phase.emplace_back(std::make_unique<ConstCanonicalizer>());
+ phase.emplace_back(std::make_unique<Conv2DBackpropInputCanonicalizer>());
if (moco::tf::get<moco::tf::Knob::CanonicalizeConv2D>())
- phase.emplace_back(stdex::make_unique<Conv2DCanonicalizer>());
- phase.emplace_back(stdex::make_unique<DepthwiseConv2dNativeCanonicalizer>());
- phase.emplace_back(stdex::make_unique<IdentityCanonicalizer>());
- phase.emplace_back(stdex::make_unique<MaximumCanonicalizer>());
- phase.emplace_back(stdex::make_unique<MaxPoolCanonicalizer>());
- phase.emplace_back(stdex::make_unique<MeanCanonicalizer>());
- phase.emplace_back(stdex::make_unique<MulCanonicalizer>());
- phase.emplace_back(stdex::make_unique<PadCanonicalizer>());
- phase.emplace_back(stdex::make_unique<PlaceholderCanonicalizer>());
- phase.emplace_back(stdex::make_unique<RealDivCanonicalizer>());
- phase.emplace_back(stdex::make_unique<ReluCanonicalizer>());
- phase.emplace_back(stdex::make_unique<Relu6Canonicalizer>());
- phase.emplace_back(stdex::make_unique<ReshapeCanonicalizer>());
- phase.emplace_back(stdex::make_unique<RsqrtCanonicalizer>());
- phase.emplace_back(stdex::make_unique<SoftmaxCanonicalizer>());
- phase.emplace_back(stdex::make_unique<SqrtCanonicalizer>());
+ phase.emplace_back(std::make_unique<Conv2DCanonicalizer>());
+ phase.emplace_back(std::make_unique<DepthwiseConv2dNativeCanonicalizer>());
+ phase.emplace_back(std::make_unique<IdentityCanonicalizer>());
+ phase.emplace_back(std::make_unique<MaximumCanonicalizer>());
+ phase.emplace_back(std::make_unique<MaxPoolCanonicalizer>());
+ phase.emplace_back(std::make_unique<MeanCanonicalizer>());
+ phase.emplace_back(std::make_unique<MulCanonicalizer>());
+ phase.emplace_back(std::make_unique<PadCanonicalizer>());
+ phase.emplace_back(std::make_unique<PlaceholderCanonicalizer>());
+ phase.emplace_back(std::make_unique<RealDivCanonicalizer>());
+ phase.emplace_back(std::make_unique<ReluCanonicalizer>());
+ phase.emplace_back(std::make_unique<Relu6Canonicalizer>());
+ phase.emplace_back(std::make_unique<ReshapeCanonicalizer>());
+ phase.emplace_back(std::make_unique<RsqrtCanonicalizer>());
+ phase.emplace_back(std::make_unique<SoftmaxCanonicalizer>());
+ phase.emplace_back(std::make_unique<SqrtCanonicalizer>());
// NOTE SquaredDifference is handled in ResolveSquaredDifference
- phase.emplace_back(stdex::make_unique<SqueezeCanonicalizer>());
- phase.emplace_back(stdex::make_unique<StopGradientCanonicalizer>());
- phase.emplace_back(stdex::make_unique<SubCanonicalizer>());
- phase.emplace_back(stdex::make_unique<TanhCanonicalizer>());
+ phase.emplace_back(std::make_unique<SqueezeCanonicalizer>());
+ phase.emplace_back(std::make_unique<StopGradientCanonicalizer>());
+ phase.emplace_back(std::make_unique<SubCanonicalizer>());
+ phase.emplace_back(std::make_unique<TanhCanonicalizer>());
// For virtual nodes
- phase.emplace_back(stdex::make_unique<TFPushCanonicalizer>());
+ phase.emplace_back(std::make_unique<TFPushCanonicalizer>());
/* TRANSFORM DECLARATION END */
ProgressReporter prog(g, logo::PhaseStrategy::Restart);
#define __CODEC_HELPER_H__
#include <plier/tf/Convert.h>
-#include <stdex/Memory.h>
+
+#include <memory>
namespace
{
void set_feature_enc(loco::FeatureEncode *feature_enc, DataLayout data_layout)
{
- auto enc = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Feature>>();
+ auto enc = std::make_unique<loco::PermutingEncoder<loco::Domain::Feature>>();
if (data_layout == DataLayout::NHWC)
{
void set_feature_dec(loco::FeatureDecode *feature_dec, DataLayout data_layout)
{
- auto dec = stdex::make_unique<loco::PermutingDecoder<loco::Domain::Feature>>();
+ auto dec = std::make_unique<loco::PermutingDecoder<loco::Domain::Feature>>();
if (data_layout == DataLayout::NHWC)
{
#include <loco/Service/ShapeInference.h>
-#include <stdex/Memory.h>
#include <oops/UserExn.h>
#include <google/protobuf/io/coded_stream.h>
#include <google/protobuf/io/zero_copy_stream_impl.h>
#include <google/protobuf/text_format.h>
+#include <memory>
#include <iostream>
#include <sstream>
#include <fstream>
for (const auto &custom_op : sig.customops())
{
std::unique_ptr<moco::tf::COpCallGraphBuilder> builder =
- stdex::make_unique<moco::tf::COpCallGraphBuilder>(&sig);
+ std::make_unique<moco::tf::COpCallGraphBuilder>(&sig);
registry.add(custom_op, std::move(builder));
}
auto input = graph->inputs()->at(n);
auto input_node = moco::placeholder_node(graph.get(), n);
assert(input_node != nullptr);
- input->shape(stdex::make_unique<loco::TensorShape>(tensor_shape(input_node)));
+ input->shape(std::make_unique<loco::TensorShape>(tensor_shape(input_node)));
}
for (uint32_t n = 0; n < graph->outputs()->size(); ++n)
auto output = graph->outputs()->at(n);
auto output_node = moco::push_node(graph.get(), n);
assert(output_node != nullptr);
- output->shape(stdex::make_unique<loco::TensorShape>(::tensor_shape(output_node)));
+ output->shape(std::make_unique<loco::TensorShape>(::tensor_shape(output_node)));
}
// Convert graph to hold only Canonical dialect
namespace tf
{
-#define KNOB_BOOL(NAME, DEFAULT, DESC) \
- template <> typename KnobTrait<Knob::NAME>::ValueType get<Knob::NAME>(void) \
- { \
- static typename KnobTrait<Knob::NAME>::ValueType value = \
- ::knob_load<typename KnobTrait<Knob::NAME>::ValueType>(::knob_loader(), #NAME, DEFAULT); \
- return value; \
+#define KNOB_BOOL(NAME, DEFAULT, DESC) \
+ template <> typename KnobTrait<Knob::NAME>::ValueType get<Knob::NAME>(void) \
+ { \
+ static typename KnobTrait<Knob::NAME>::ValueType value = \
+ ::knob_load<typename KnobTrait<Knob::NAME>::ValueType>(::knob_loader(), #NAME, DEFAULT); \
+ return value; \
}
#include "Knob.lst"
#undef KNOB_BOOL
FormattedGraph fmt(loco::Graph *g)
{
- auto node_summary_builder = stdex::make_unique<TFNodeSummaryBuilderFactory>();
+ auto node_summary_builder = std::make_unique<TFNodeSummaryBuilderFactory>();
return std::move(locop::fmt<locop::LinearV1>(g).with(std::move(node_summary_builder)));
}
#include <moco/Names.h>
#include <moco/tf/Frontend.h>
#include <loco.h>
-#include <stdex/Memory.h>
#include <oops/UserExn.h>
+#include <memory>
#include <vector>
#include <cassert>
#include <stdexcept>
{
public:
COpCallGraphUpdate(locoex::COpCall *node, const std::vector<moco::TensorName> &input_names)
- : _node(node), _input_names(input_names)
+ : _node(node), _input_names(input_names)
{
}
if (val.value_case() == tensorflow::AttrValue::kF)
{
- call_node->attr(name, stdex::make_unique<locoex::COpAttrFloat>(val.f()));
+ call_node->attr(name, std::make_unique<locoex::COpAttrFloat>(val.f()));
}
else if (val.value_case() == tensorflow::AttrValue::kI)
{
- call_node->attr(name, stdex::make_unique<locoex::COpAttrInt>(val.i()));
+ call_node->attr(name, std::make_unique<locoex::COpAttrInt>(val.i()));
}
// TODO define more types
else
{
input_names.emplace_back(TensorName(tf_node.input(i)));
}
- auto update = stdex::make_unique<COpCallGraphUpdate>(call_node, input_names);
+ auto update = std::make_unique<COpCallGraphUpdate>(call_node, input_names);
updates->enroll(std::move(update));
}
class COpCallGraphBuilder final : public GraphBuilder
{
public:
- COpCallGraphBuilder(const ModelSignature *signature) : _signature(signature) { /* empty */}
+ COpCallGraphBuilder(const ModelSignature *signature) : _signature(signature)
+ { /* empty */
+ }
bool validate(const tensorflow::NodeDef &) const override;
void build(const tensorflow::NodeDef &, GraphBuilderContext *) const override;
#include <loco.h>
#include <plier/tf/TestHelper.h>
-#include <stdex/Memory.h>
#include <gtest/gtest.h>
+#include <memory>
+
using namespace moco::tf::test;
namespace
// import
moco::GraphBuilderRegistry registry{&moco::GraphBuilderRegistry::get()};
- registry.add("new_custom_op", stdex::make_unique<moco::tf::COpCallGraphBuilder>(&signature));
+ registry.add("new_custom_op", std::make_unique<moco::tf::COpCallGraphBuilder>(&signature));
moco::Importer importer(®istry);
std::unique_ptr<loco::Graph> graph = importer.import(signature, graph_def);
#include <logo/Phase.h>
-#include <stdex/Memory.h>
+#include <memory>
namespace moco
{
/* TRANSFORM DECLARATION BEGIN */
// Shape inference is required for ResolveRedundantReshape
- phase.emplace_back(stdex::make_unique<ShapeInferencePass>());
+ phase.emplace_back(std::make_unique<ShapeInferencePass>());
if (moco::tf::get<moco::tf::Knob::ConstantFolding>())
{
- phase.emplace_back(stdex::make_unique<logo::ConstantFoldingPass>());
+ phase.emplace_back(std::make_unique<logo::ConstantFoldingPass>());
}
if (moco::tf::get<moco::tf::Knob::RemoveDeadNode>())
{
- phase.emplace_back(stdex::make_unique<logo::RemoveDeadNodePass>());
+ phase.emplace_back(std::make_unique<logo::RemoveDeadNodePass>());
}
if (moco::tf::get<moco::tf::Knob::ReorderDecode>() &&
moco::tf::get<moco::tf::Knob::ReorderDecodeTensorBiasAdd>())
{
- phase.emplace_back(stdex::make_unique<logo::ReorderDecodePass<loco::TensorBiasAdd>>());
+ phase.emplace_back(std::make_unique<logo::ReorderDecodePass<loco::TensorBiasAdd>>());
}
if (moco::tf::get<moco::tf::Knob::ReorderDecode>() &&
moco::tf::get<moco::tf::Knob::ReorderDecodeReLU>())
{
- phase.emplace_back(stdex::make_unique<logo::ReorderDecodePass<loco::ReLU>>());
+ phase.emplace_back(std::make_unique<logo::ReorderDecodePass<loco::ReLU>>());
}
if (moco::tf::get<moco::tf::Knob::SimplifyDomainConversion>())
{
- phase.emplace_back(stdex::make_unique<logo::SimplifyDomainConversionPass>());
+ phase.emplace_back(std::make_unique<logo::SimplifyDomainConversionPass>());
}
if (moco::tf::get<moco::tf::Knob::RemoveForwardNode>())
{
- phase.emplace_back(stdex::make_unique<logo::RemoveForwardNodePass>());
+ phase.emplace_back(std::make_unique<logo::RemoveForwardNodePass>());
}
if (moco::tf::get<moco::tf::Knob::ResolveDuplicateReshape>())
{
- phase.emplace_back(stdex::make_unique<logo::ResolveDuplicateReshapePass>());
+ phase.emplace_back(std::make_unique<logo::ResolveDuplicateReshapePass>());
}
if (moco::tf::get<moco::tf::Knob::ResolveRedundantReshape>())
{
- phase.emplace_back(stdex::make_unique<logo::ResolveRedundantReshapePass>());
+ phase.emplace_back(std::make_unique<logo::ResolveRedundantReshapePass>());
}
/* TRANSFORM DECLARATION END */
{
public:
ProgressReporter(loco::Graph *graph, logo::PhaseStrategy strategy)
- : _graph{graph}, _strategy{strategy}
+ : _graph{graph}, _strategy{strategy}
{
// DO NOTHING
}
#include <locop/FormattedGraph.h>
-#include <stdex/Memory.h>
+#include <memory>
namespace moco
{
public:
std::unique_ptr<locop::NodeSummaryBuilder> create(const locop::SymbolTable *tlb) const final
{
- return stdex::make_unique<MocoNodeSummaryBuilder>(tlb);
+ return std::make_unique<MocoNodeSummaryBuilder>(tlb);
}
};
#include <logo/Phase.h>
-#include <stdex/Memory.h>
+#include <memory>
namespace moco
{
/* TRANSFORM DECLARATION BEGIN */
if (moco::tf::get<moco::tf::Knob::ResolveFusedBatchNorm>())
{
- phase.emplace_back(stdex::make_unique<moco::ResolveFusedBatchNorm>());
+ phase.emplace_back(std::make_unique<moco::ResolveFusedBatchNorm>());
}
if (moco::tf::get<moco::tf::Knob::FuseBinaryIntoPreceding>())
{
- phase.emplace_back(stdex::make_unique<moco::FuseBinaryIntoPreceding>());
+ phase.emplace_back(std::make_unique<moco::FuseBinaryIntoPreceding>());
}
if (moco::tf::get<moco::tf::Knob::ResolveConstantShape>())
{
- phase.emplace_back(stdex::make_unique<moco::ResolveConstantShape>());
+ phase.emplace_back(std::make_unique<moco::ResolveConstantShape>());
}
if (moco::tf::get<moco::tf::Knob::ResolveReshapeWildcardDim>())
{
- phase.emplace_back(stdex::make_unique<moco::ResolveReshapeWildcardDim>());
+ phase.emplace_back(std::make_unique<moco::ResolveReshapeWildcardDim>());
}
if (moco::tf::get<moco::tf::Knob::ResolveSquaredDifference>())
{
- phase.emplace_back(stdex::make_unique<moco::ResolveSquaredDifference>());
+ phase.emplace_back(std::make_unique<moco::ResolveSquaredDifference>());
}
if (moco::tf::get<moco::tf::Knob::RemoveTFIdentityNode>())
{
- phase.emplace_back(stdex::make_unique<moco::RemoveTFIdentityNode>());
+ phase.emplace_back(std::make_unique<moco::RemoveTFIdentityNode>());
}
if (moco::tf::get<moco::tf::Knob::RemoveDeadNode>())
{
- phase.emplace_back(stdex::make_unique<logo::RemoveDeadNodePass>());
+ phase.emplace_back(std::make_unique<logo::RemoveDeadNodePass>());
}
if (moco::tf::get<moco::tf::Knob::SqueezeReduceNode>())
{
- phase.emplace_back(stdex::make_unique<moco::SqueezeReduceNode>());
+ phase.emplace_back(std::make_unique<moco::SqueezeReduceNode>());
}
// Shape inference is needed for added nodes doing above transformations
- phase.emplace_back(stdex::make_unique<moco::tf::ShapeInferencePass>());
- phase.emplace_back(stdex::make_unique<moco::tf::TypeInferencePass>());
+ phase.emplace_back(std::make_unique<moco::tf::ShapeInferencePass>());
+ phase.emplace_back(std::make_unique<moco::tf::TypeInferencePass>());
/* TRANSFORM DECLARATION END */
ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
#include <moco/IR/Nodes/TFConst.h>
-#include <stdex/Memory.h>
+#include <memory>
#include <gtest/gtest.h>
TFNodeBuildTester::TFNodeBuildTester()
{
_graph = loco::make_graph();
- _tensor_names = stdex::make_unique<moco::SymbolTable>();
+ _tensor_names = std::make_unique<moco::SymbolTable>();
}
void TFNodeBuildTester::inputs(const std::vector<std::string> &names)
{
assert(_output != nullptr);
- auto node_defs = stdex::make_unique<moco::NodeDefTable>();
- auto updates = stdex::make_unique<moco::UpdateQueue>();
+ auto node_defs = std::make_unique<moco::NodeDefTable>();
+ auto updates = std::make_unique<moco::UpdateQueue>();
moco::GraphBuilderContext gb_context(_graph.get(), node_defs.get(), _tensor_names.get(),
updates.get());
loco::MultiDialectShapeInferenceRule rules;
rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
- .bind(TFDialect::get(), &tf_rule)
- .bind(locoex::COpDialect::get(), &cop_rule);
+ .bind(TFDialect::get(), &tf_rule)
+ .bind(locoex::COpDialect::get(), &cop_rule);
return loco::apply(&rules).to(graph);
}
loco::MultiDialectTypeInferenceRule rules;
rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
- .bind(TFDialect::get(), &tf_rule)
- .bind(locoex::COpDialect::get(), &cop_rule);
+ .bind(TFDialect::get(), &tf_rule)
+ .bind(locoex::COpDialect::get(), &cop_rule);
loco::apply(&rules).to(graph);
target_include_directories(moco_import PUBLIC include)
target_link_libraries(moco_import PUBLIC moco_lang)
target_link_libraries(moco_import PUBLIC mio_tf)
-target_link_libraries(moco_import PUBLIC stdex)
target_link_libraries(moco_import PRIVATE nncc_common)
target_link_libraries(moco_import PRIVATE plier_tf)
target_link_libraries(moco_import PRIVATE oops)
public:
GraphBuilderContext(loco::Graph *g, NodeDefTable *nodedef, SymbolTable *tensor_names,
UpdateQueue *updates)
- : _g(g), _nodedef(nodedef), _tensor_names(tensor_names), _updates(updates)
+ : _g(g), _nodedef(nodedef), _tensor_names(tensor_names), _updates(updates)
{
// DO NOTHING
}
std::map<const std::string, std::unique_ptr<GraphBuilder>> _builder_map;
};
-} // namespace mono
+} // namespace moco
#endif // __MOCO_IMPORT_GRAPH_BUILDER_REGISTRY_H__
{
/**
-* @brief GraphBuilder for Softmax node
-*/
+ * @brief GraphBuilder for Softmax node
+ */
class SoftmaxGraphBuilder final : public GraphBuilder
{
public:
#include "moco/Import/GraphBuilderRegistry.h"
#include "moco/Import/Nodes.h"
-#include <stdex/Memory.h>
+#include <memory>
namespace moco
{
GraphBuilderRegistry::GraphBuilderRegistry()
{
- add("Add", stdex::make_unique<AddGraphBuilder>());
- add("AvgPool", stdex::make_unique<AvgPoolGraphBuilder>());
- add("BiasAdd", stdex::make_unique<BiasAddGraphBuilder>());
- add("ConcatV2", stdex::make_unique<ConcatV2GraphBuilder>());
- add("Const", stdex::make_unique<ConstGraphBuilder>());
- add("Conv2D", stdex::make_unique<Conv2DGraphBuilder>());
- add("Conv2DBackpropInput", stdex::make_unique<Conv2DBackpropInputGraphBuilder>());
- add("DepthwiseConv2dNative", stdex::make_unique<DepthwiseConv2dNativeGraphBuilder>());
- add("FakeQuantWithMinMaxVars", stdex::make_unique<FakeQuantWithMinMaxVarsGraphBuilder>());
- add("FusedBatchNorm", stdex::make_unique<FusedBatchNormGraphBuilder>());
- add("Identity", stdex::make_unique<IdentityGraphBuilder>());
- add("Maximum", stdex::make_unique<MaximumGraphBuilder>());
- add("MaxPool", stdex::make_unique<MaxPoolGraphBuilder>());
- add("Mean", stdex::make_unique<MeanGraphBuilder>());
- add("Mul", stdex::make_unique<MulGraphBuilder>());
- add("Pack", stdex::make_unique<PackGraphBuilder>());
- add("Pad", stdex::make_unique<PadGraphBuilder>());
- add("Placeholder", stdex::make_unique<PlaceholderGraphBuilder>());
- add("RealDiv", stdex::make_unique<RealDivGraphBuilder>());
- add("Relu", stdex::make_unique<ReluGraphBuilder>());
- add("Relu6", stdex::make_unique<Relu6GraphBuilder>());
- add("Reshape", stdex::make_unique<ReshapeGraphBuilder>());
- add("Rsqrt", stdex::make_unique<RsqrtGraphBuilder>());
- add("Shape", stdex::make_unique<ShapeGraphBuilder>());
- add("Softmax", stdex::make_unique<SoftmaxGraphBuilder>());
- add("Sqrt", stdex::make_unique<SqrtGraphBuilder>());
- add("SquaredDifference", stdex::make_unique<SquaredDifferenceGraphBuilder>());
- add("Squeeze", stdex::make_unique<SqueezeGraphBuilder>());
- add("StopGradient", stdex::make_unique<StopGradientGraphBuilder>());
- add("StridedSlice", stdex::make_unique<StridedSliceGraphBuilder>());
- add("Sub", stdex::make_unique<SubGraphBuilder>());
- add("Tanh", stdex::make_unique<TanhGraphBuilder>());
+ add("Add", std::make_unique<AddGraphBuilder>());
+ add("AvgPool", std::make_unique<AvgPoolGraphBuilder>());
+ add("BiasAdd", std::make_unique<BiasAddGraphBuilder>());
+ add("ConcatV2", std::make_unique<ConcatV2GraphBuilder>());
+ add("Const", std::make_unique<ConstGraphBuilder>());
+ add("Conv2D", std::make_unique<Conv2DGraphBuilder>());
+ add("Conv2DBackpropInput", std::make_unique<Conv2DBackpropInputGraphBuilder>());
+ add("DepthwiseConv2dNative", std::make_unique<DepthwiseConv2dNativeGraphBuilder>());
+ add("FakeQuantWithMinMaxVars", std::make_unique<FakeQuantWithMinMaxVarsGraphBuilder>());
+ add("FusedBatchNorm", std::make_unique<FusedBatchNormGraphBuilder>());
+ add("Identity", std::make_unique<IdentityGraphBuilder>());
+ add("Maximum", std::make_unique<MaximumGraphBuilder>());
+ add("MaxPool", std::make_unique<MaxPoolGraphBuilder>());
+ add("Mean", std::make_unique<MeanGraphBuilder>());
+ add("Mul", std::make_unique<MulGraphBuilder>());
+ add("Pack", std::make_unique<PackGraphBuilder>());
+ add("Pad", std::make_unique<PadGraphBuilder>());
+ add("Placeholder", std::make_unique<PlaceholderGraphBuilder>());
+ add("RealDiv", std::make_unique<RealDivGraphBuilder>());
+ add("Relu", std::make_unique<ReluGraphBuilder>());
+ add("Relu6", std::make_unique<Relu6GraphBuilder>());
+ add("Reshape", std::make_unique<ReshapeGraphBuilder>());
+ add("Rsqrt", std::make_unique<RsqrtGraphBuilder>());
+ add("Shape", std::make_unique<ShapeGraphBuilder>());
+ add("Softmax", std::make_unique<SoftmaxGraphBuilder>());
+ add("Sqrt", std::make_unique<SqrtGraphBuilder>());
+ add("SquaredDifference", std::make_unique<SquaredDifferenceGraphBuilder>());
+ add("Squeeze", std::make_unique<SqueezeGraphBuilder>());
+ add("StopGradient", std::make_unique<StopGradientGraphBuilder>());
+ add("StridedSlice", std::make_unique<StridedSliceGraphBuilder>());
+ add("Sub", std::make_unique<SubGraphBuilder>());
+ add("Tanh", std::make_unique<TanhGraphBuilder>());
// Virtual node like `TFPush` need not to be added here
}
#include <moco/IR/Nodes/TFPlaceholder.h>
#include <moco/IR/TFNode.h>
-#include <stdex/Memory.h>
#include <oops/UserExn.h>
+#include <memory>
#include <cassert>
#include <sstream>
#include <stdexcept>
void convert_graph(const moco::GraphBuilderSource &source, const moco::ModelSignature &signature,
tensorflow::GraphDef &tf_graph_def, loco::Graph *graph)
{
- auto nodedef = stdex::make_unique<moco::NodeDefTable>();
- auto tensor_names = stdex::make_unique<moco::SymbolTable>();
- auto updates = stdex::make_unique<moco::UpdateQueue>();
+ auto nodedef = std::make_unique<moco::NodeDefTable>();
+ auto tensor_names = std::make_unique<moco::SymbolTable>();
+ auto updates = std::make_unique<moco::UpdateQueue>();
moco::GraphBuilderContext gb_context(graph, nodedef.get(), tensor_names.get(), updates.get());
#include <moco/IR/Nodes/TFAdd.h>
#include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
namespace
{
add_input_names.push_back(TensorName(node.input(0))); // x
add_input_names.push_back(TensorName(node.input(1))); // y
- auto tf_add_update = stdex::make_unique<TFAddGraphUpdate>(tf_add, add_input_names);
+ auto tf_add_update = std::make_unique<TFAddGraphUpdate>(tf_add, add_input_names);
updates->enroll(std::move(tf_add_update));
}
#include "Convert.h"
#include <loco/IR/PermutingCodec.h>
-#include <stdex/Memory.h>
#include <plier/tf/Convert.h>
#include <oops/UserExn.h>
+#include <memory>
#include <cassert>
#include <stdexcept>
{
public:
TFAvgPoolGraphUpdate(TFAvgPool *node, const TensorName &name)
- : _avgpool_node(node), _value_name(name)
+ : _avgpool_node(node), _value_name(name)
{
}
tensor_names->enroll(output_name, avgPool_node);
// Record ifm inputs to featureEncode_node
- auto update = stdex::make_unique<TFAvgPoolGraphUpdate>(avgPool_node, TensorName(node.input(0)));
+ auto update = std::make_unique<TFAvgPoolGraphUpdate>(avgPool_node, TensorName(node.input(0)));
updates->enroll(std::move(update));
}
#include <loco.h>
#include <loco/IR/PermutingCodec.h>
-#include <stdex/Memory.h>
#include <plier/tf/Convert.h>
#include <oops/UserExn.h>
+#include <memory>
#include <cassert>
#include <vector>
{
public:
TFBiasAddGraphUpdate(TFBiasAdd *biasadd, std::vector<TensorName> &names)
- : _biasadd(biasadd), _names(names)
+ : _biasadd(biasadd), _names(names)
{
}
input_names.push_back(TensorName(node.input(0)));
input_names.push_back(TensorName(node.input(1)));
- auto update = stdex::make_unique<TFBiasAddGraphUpdate>(tf_bias_add, input_names);
+ auto update = std::make_unique<TFBiasAddGraphUpdate>(tf_bias_add, input_names);
updates->enroll(std::move(update));
}
#include <moco/Names.h>
#include <loco.h>
-#include <stdex/Memory.h>
#include <plier/tf/Convert.h>
+#include <memory>
#include <cassert>
namespace
{
public:
TFConcatV2GraphUpdate(TFConcatV2 *node, std::vector<TensorName> names)
- : _node(node), _names(names)
+ : _node(node), _names(names)
{
}
TensorName output_name(node.name(), 0);
tensor_names->enroll(output_name, concat_node);
- auto update = stdex::make_unique<TFConcatV2GraphUpdate>(concat_node, input_names);
+ auto update = std::make_unique<TFConcatV2GraphUpdate>(concat_node, input_names);
updates->enroll(std::move(update));
}
read_value_float32(const_node, num_elements, input_tensor);
break;
- // TODO support other types
+ // TODO support other types
default:
assert(false);
#include <loco.h>
#include <loco/IR/PermutingCodec.h>
-#include <stdex/Memory.h>
#include <plier/tf/Convert.h>
#include <oops/UserExn.h>
+#include <memory>
#include <cassert>
#include <stdexcept>
#include <algorithm>
input_names.push_back(TensorName(node.input(1))); // kernel
// Record ifm inputs to featureEncode_node
- auto tfconv2d_update = stdex::make_unique<TFConv2DGraphUpdate>(conv2d, input_names);
+ auto tfconv2d_update = std::make_unique<TFConv2DGraphUpdate>(conv2d, input_names);
updates->enroll(std::move(tfconv2d_update));
}
#include "Convert.h"
#include <loco.h>
-#include <stdex/Memory.h>
#include <plier/tf/Convert.h>
#include <oops/UserExn.h>
+#include <memory>
+
namespace
{
using namespace moco;
{
public:
Conv2DBackpropInputGraphUpdate(TFConv2DBackpropInput *node, std::vector<TensorName> names)
- : _node(node), _input_names(names)
+ : _node(node), _input_names(names)
{
// DO NOTHING
}
// update
auto conv2d_backprop_update =
- stdex::make_unique<Conv2DBackpropInputGraphUpdate>(conv2d_backprop, input_names);
+ std::make_unique<Conv2DBackpropInputGraphUpdate>(conv2d_backprop, input_names);
updates->enroll(std::move(conv2d_backprop_update));
}
#include <plier/tf/Convert.h>
#include <loco/IR/PermutingCodec.h>
-#include <stdex/Memory.h>
#include <oops/UserExn.h>
+#include <memory>
#include <cassert>
using namespace plier::tf;
{
public:
TFDepthwiseConv2dNativeGraphUpdate(TFDepthwiseConv2dNative *node, std::vector<TensorName> names)
- : _node(node), _names(names)
+ : _node(node), _names(names)
{
}
input_names.push_back(TensorName(node.input(1))); // kernel
// Record ifm inputs to featureEncode_node
- auto tfdepthwiseconv2dnative_update = stdex::make_unique<TFDepthwiseConv2dNativeGraphUpdate>(
- depthwiseconv2d_native_node, input_names);
+ auto tfdepthwiseconv2dnative_update =
+ std::make_unique<TFDepthwiseConv2dNativeGraphUpdate>(depthwiseconv2d_native_node, input_names);
updates->enroll(std::move(tfdepthwiseconv2dnative_update));
}
#include <plier/tf/Convert.h>
#include <loco/IR/PermutingCodec.h>
-#include <stdex/Memory.h>
+#include <memory>
#include <cassert>
using namespace plier::tf;
public:
TFFakeQuantWithMinMaxVarsGraphUpdate(TFFakeQuantWithMinMaxVars *node,
std::vector<TensorName> names)
- : _node(node), _names(names)
+ : _node(node), _names(names)
{
}
// Record ifm inputs to featureEncode_node
auto tffakequant_update =
- stdex::make_unique<TFFakeQuantWithMinMaxVarsGraphUpdate>(fakequant_node, input_names);
+ std::make_unique<TFFakeQuantWithMinMaxVarsGraphUpdate>(fakequant_node, input_names);
updates->enroll(std::move(tffakequant_update));
}
#include <moco/IR/Nodes/TFFusedBatchNorm.h>
#include <loco.h>
-#include <stdex/Memory.h>
#include <plier/tf/Convert.h>
+#include <memory>
+
namespace
{
{
public:
FusedBatchNormGraphUpdate(TFFusedBatchNorm *node, std::vector<TensorName> names)
- : _node(node), _names(names)
+ : _node(node), _names(names)
{
}
fbn_input_names.push_back(TensorName(node.input(3))); // mean
fbn_input_names.push_back(TensorName(node.input(4))); // variance
- auto tf_fbn_update = stdex::make_unique<FusedBatchNormGraphUpdate>(tf_fbn, fbn_input_names);
+ auto tf_fbn_update = std::make_unique<FusedBatchNormGraphUpdate>(tf_fbn, fbn_input_names);
updates->enroll(std::move(tf_fbn_update));
}
#include <moco/Names.h>
#include <loco.h>
-#include <stdex/Memory.h>
+#include <memory>
#include <vector>
namespace
{
public:
TFIdentityGraphUpdate(TFIdentity *node, const std::vector<TensorName> &names)
- : _node(node), _names(names)
+ : _node(node), _names(names)
{
}
{
names.emplace_back(TensorName(node.input(i)));
}
- auto update = stdex::make_unique<TFIdentityGraphUpdate>(identity_node, names);
+ auto update = std::make_unique<TFIdentityGraphUpdate>(identity_node, names);
updates->enroll(std::move(update));
}
#include <loco.h>
#include <loco/IR/PermutingCodec.h>
-#include <stdex/Memory.h>
#include <plier/tf/Convert.h>
#include <oops/UserExn.h>
+#include <memory>
#include <cassert>
#include <stdexcept>
{
public:
TFMaxPoolGraphUpdate(TFMaxPool *node, const TensorName &name)
- : _maxpool_node(node), _input_name(name)
+ : _maxpool_node(node), _input_name(name)
{
}
tensor_names->enroll(output_name, maxPool_node);
// Record ifm inputs to featureEncode_node
- auto update = stdex::make_unique<TFMaxPoolGraphUpdate>(maxPool_node, TensorName(node.input(0)));
+ auto update = std::make_unique<TFMaxPoolGraphUpdate>(maxPool_node, TensorName(node.input(0)));
updates->enroll(std::move(update));
}
#include <moco/IR/Nodes/TFMaximum.h>
#include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
namespace
{
add_input_names.push_back(TensorName(node.input(0))); // x
add_input_names.push_back(TensorName(node.input(1))); // y
- auto tf_maximum_update = stdex::make_unique<TFMaximumGraphUpdate>(tf_maximum, add_input_names);
+ auto tf_maximum_update = std::make_unique<TFMaximumGraphUpdate>(tf_maximum, add_input_names);
updates->enroll(std::move(tf_maximum_update));
}
#include <moco/IR/Nodes/TFMean.h>
#include <loco.h>
-#include <stdex/Memory.h>
#include <plier/tf/Convert.h>
+#include <memory>
+
namespace
{
using namespace moco;
public:
MeanGraphUpdate(TFMean *node, const TensorName &&input_name,
const TensorName &&reduction_indices_name)
- : _node(node), _input_name(input_name), _reduction_indices_name(reduction_indices_name)
+ : _node(node), _input_name(input_name), _reduction_indices_name(reduction_indices_name)
{
// DO NOTHING
}
TensorName output_name(node.name(), 0);
tensor_names->enroll(output_name, tf_mean);
- auto update = stdex::make_unique<MeanGraphUpdate>(tf_mean, TensorName(node.input(0)),
- TensorName(node.input(1)));
+ auto update = std::make_unique<MeanGraphUpdate>(tf_mean, TensorName(node.input(0)),
+ TensorName(node.input(1)));
updates->enroll(std::move(update));
}
#include <moco/IR/Nodes/TFMul.h>
#include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
namespace
{
add_input_names.push_back(TensorName(node.input(0))); // x
add_input_names.push_back(TensorName(node.input(1))); // y
- auto tf_mul_update = stdex::make_unique<TFMulGraphUpdate>(tf_mul, add_input_names);
+ auto tf_mul_update = std::make_unique<TFMulGraphUpdate>(tf_mul, add_input_names);
updates->enroll(std::move(tf_mul_update));
}
#include <loco.h>
#include <loco/IR/NodeShape.h>
-#include <stdex/Memory.h>
#include <plier/tf/Convert.h>
+#include <memory>
#include <cassert>
namespace
TensorName output_name(node.name(), 0);
tensor_names->enroll(output_name, pack_node);
- auto update = stdex::make_unique<TFPackGraphUpdate>(pack_node, input_names);
+ auto update = std::make_unique<TFPackGraphUpdate>(pack_node, input_names);
updates->enroll(std::move(update));
}
#include <moco/IR/Nodes/TFPad.h>
#include <loco.h>
-#include <stdex/Memory.h>
#include <plier/tf/Convert.h>
+#include <memory>
+
namespace
{
add_input_names.push_back(TensorName(node.input(1))); // paddings
// Queue node input update
- auto tf_pad_update = stdex::make_unique<TFPadGraphUpdate>(tf_pad, add_input_names);
+ auto tf_pad_update = std::make_unique<TFPadGraphUpdate>(tf_pad, add_input_names);
updates->enroll(std::move(tf_pad_update));
}
#include <moco/IR/Nodes/TFRealDiv.h>
#include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
namespace
{
div_input_names.push_back(TensorName(node.input(0))); // x
div_input_names.push_back(TensorName(node.input(1))); // y
- auto tf_div_update = stdex::make_unique<TFRealDivGraphUpdate>(tf_div, div_input_names);
+ auto tf_div_update = std::make_unique<TFRealDivGraphUpdate>(tf_div, div_input_names);
updates->enroll(std::move(tf_div_update));
}
#include <moco/Names.h>
#include <loco.h>
-#include <stdex/Memory.h>
+#include <memory>
#include <cassert>
#include <stdexcept>
tensor_names->enroll(output_name, relu_node);
// Queue node input update
- auto update = stdex::make_unique<TFReluGraphUpdate>(relu_node, TensorName(node.input(0)));
+ auto update = std::make_unique<TFReluGraphUpdate>(relu_node, TensorName(node.input(0)));
updates->enroll(std::move(update));
}
#include <moco/IR/Nodes/TFRelu6.h>
-#include <stdex/Memory.h>
+#include <memory>
namespace
{
tensor_names->enroll(output_name, relu_node);
// Queue node input update
- auto update = stdex::make_unique<TFRelu6GraphUpdate>(relu_node, TensorName(node.input(0)));
+ auto update = std::make_unique<TFRelu6GraphUpdate>(relu_node, TensorName(node.input(0)));
updates->enroll(std::move(update));
}
#include <moco/Names.h>
#include <plier/tf/Convert.h>
#include <loco.h>
-#include <stdex/Memory.h>
+#include <memory>
#include <cassert>
#include <stdexcept>
input_names.push_back(TensorName(node.input(1))); // shape
// Queue node input update
- auto update = stdex::make_unique<ReshapeGraphUpdate>(reshape, input_names);
+ auto update = std::make_unique<ReshapeGraphUpdate>(reshape, input_names);
updates->enroll(std::move(update));
}
#include <moco/IR/Nodes/TFRsqrt.h>
#include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
namespace
{
tensor_names->enroll(output_name, tf_rsqrt);
// Queue node input update
- auto tf_rsqrt_update =
- stdex::make_unique<TFRsqrtGraphUpdate>(tf_rsqrt, TensorName(node.input(0)));
+ auto tf_rsqrt_update = std::make_unique<TFRsqrtGraphUpdate>(tf_rsqrt, TensorName(node.input(0)));
updates->enroll(std::move(tf_rsqrt_update));
}
#include <moco/IR/Nodes/TFShape.h>
#include <loco.h>
-#include <stdex/Memory.h>
#include <plier/tf/Convert.h>
+#include <memory>
+
namespace
{
using namespace moco;
{
public:
ShapeGraphUpdate(TFShape *node, const TensorName &&input_name)
- : _node(node), _input_name(input_name)
+ : _node(node), _input_name(input_name)
{
// DO NOTHING
}
TensorName output_name(node.name(), 0);
tensor_names->enroll(output_name, tf_shape);
- auto update = stdex::make_unique<ShapeGraphUpdate>(tf_shape, TensorName(node.input(0)));
+ auto update = std::make_unique<ShapeGraphUpdate>(tf_shape, TensorName(node.input(0)));
updates->enroll(std::move(update));
}
#include <moco/IR/Nodes/TFSoftmax.h>
#include <loco.h>
-#include <stdex/Memory.h>
#include <plier/tf/Convert.h>
+#include <memory>
+
namespace
{
using namespace moco;
/**
-* @brief GraphUpdate for Softmax node
-*/
+ * @brief GraphUpdate for Softmax node
+ */
class SoftmaxGraphUpdate final : public GraphUpdate
{
public:
SoftmaxGraphUpdate(TFSoftmax *node, const TensorName &&input_name)
- : _node(node), _input_name(input_name)
+ : _node(node), _input_name(input_name)
{
// DO NOTHING
}
TensorName output_name(node.name(), 0);
tensor_names->enroll(output_name, tf_softmax);
- auto update = stdex::make_unique<SoftmaxGraphUpdate>(tf_softmax, TensorName(node.input(0)));
+ auto update = std::make_unique<SoftmaxGraphUpdate>(tf_softmax, TensorName(node.input(0)));
updates->enroll(std::move(update));
}
#include <moco/IR/Nodes/TFSqrt.h>
#include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
namespace
{
tensor_names->enroll(output_name, tf_sqrt);
// Queue node input update
- auto tf_sqrt_update = stdex::make_unique<TFSqrtGraphUpdate>(tf_sqrt, TensorName(node.input(0)));
+ auto tf_sqrt_update = std::make_unique<TFSqrtGraphUpdate>(tf_sqrt, TensorName(node.input(0)));
updates->enroll(std::move(tf_sqrt_update));
}
#include <moco/IR/Nodes/TFSquaredDifference.h>
#include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
namespace
{
{
public:
TFSquaredDifferenceGraphUpdate(TFSquaredDifference *node, std::vector<TensorName> names)
- : _node(node), _names(names)
+ : _node(node), _names(names)
{
}
// Queue node input update
auto tf_sqrt_update =
- stdex::make_unique<TFSquaredDifferenceGraphUpdate>(tf_sqdiff, add_input_names);
+ std::make_unique<TFSquaredDifferenceGraphUpdate>(tf_sqdiff, add_input_names);
updates->enroll(std::move(tf_sqrt_update));
}
#include <moco/Names.h>
#include <loco.h>
-#include <stdex/Memory.h>
#include <plier/tf/Convert.h>
#include <oops/UserExn.h>
+#include <memory>
+
namespace
{
using namespace moco;
{
public:
SqueezeGraphUpdate(TFSqueeze *node, const TensorName &&input_name)
- : _node(node), _input_name(input_name)
+ : _node(node), _input_name(input_name)
{
// DO NOTHING
}
TensorName output_name(node.name(), 0);
tensor_names->enroll(output_name, tf_squeeze);
- auto update = stdex::make_unique<SqueezeGraphUpdate>(tf_squeeze, TensorName(node.input(0)));
+ auto update = std::make_unique<SqueezeGraphUpdate>(tf_squeeze, TensorName(node.input(0)));
updates->enroll(std::move(update));
}
#include <loco.h>
#include <plier/tf/Convert.h>
-#include <stdex/Memory.h>
+
+#include <memory>
namespace
{
// Queue node input update
auto tf_stopgradient_update =
- stdex::make_unique<TFStopGradientGraphUpdate>(tf_stopgradient, TensorName(node.input(0)));
+ std::make_unique<TFStopGradientGraphUpdate>(tf_stopgradient, TensorName(node.input(0)));
updates->enroll(std::move(tf_stopgradient_update));
}
#include "Convert.h"
#include <loco.h>
-#include <stdex/Memory.h>
#include <plier/tf/Convert.h>
#include <oops/UserExn.h>
+#include <memory>
+
namespace
{
using namespace moco;
{
public:
TFStridedSliceGraphUpdate(TFStridedSlice *node, std::vector<TensorName> names)
- : _node(node), _names(names)
+ : _node(node), _names(names)
{
}
input_names.push_back(TensorName(node.input(2))); // end
input_names.push_back(TensorName(node.input(3))); // strides
- auto tfconv2d_update = stdex::make_unique<TFStridedSliceGraphUpdate>(stridedslice, input_names);
+ auto tfconv2d_update = std::make_unique<TFStridedSliceGraphUpdate>(stridedslice, input_names);
updates->enroll(std::move(tfconv2d_update));
}
#include <moco/IR/Nodes/TFSub.h>
#include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
namespace
{
sub_input_names.push_back(TensorName(node.input(0))); // x
sub_input_names.push_back(TensorName(node.input(1))); // y
- auto tf_sub_update = stdex::make_unique<TFSubGraphUpdate>(tf_sub, sub_input_names);
+ auto tf_sub_update = std::make_unique<TFSubGraphUpdate>(tf_sub, sub_input_names);
updates->enroll(std::move(tf_sub_update));
}
#include <moco/IR/Nodes/TFTanh.h>
#include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
namespace
{
tensor_names->enroll(output_name, tf_tanh);
// Queue node input update
- auto tf_tanh_update = stdex::make_unique<TFTanhGraphUpdate>(tf_tanh, TensorName(node.input(0)));
+ auto tf_tanh_update = std::make_unique<TFTanhGraphUpdate>(tf_tanh, TensorName(node.input(0)));
updates->enroll(std::move(tf_tanh_update));
}
#include "TestHelper.h"
#include <moco/IR/Nodes/TFConst.h>
-#include <stdex/Memory.h>
+
+#include <memory>
#include <gtest/gtest.h>
TFNodeBuildTester::TFNodeBuildTester()
{
_graph = loco::make_graph();
- _tensor_names = stdex::make_unique<moco::SymbolTable>();
+ _tensor_names = std::make_unique<moco::SymbolTable>();
}
void TFNodeBuildTester::inputs(const std::vector<std::string> &names)
{
assert(_output != nullptr);
- auto node_defs = stdex::make_unique<moco::NodeDefTable>();
- auto updates = stdex::make_unique<moco::UpdateQueue>();
+ auto node_defs = std::make_unique<moco::NodeDefTable>();
+ auto updates = std::make_unique<moco::UpdateQueue>();
moco::GraphBuilderContext gb_context(_graph.get(), node_defs.get(), _tensor_names.get(),
updates.get());
target_include_directories(moco_lang PUBLIC include)
target_link_libraries(moco_lang PUBLIC loco)
target_link_libraries(moco_lang PRIVATE nncc_common)
-target_link_libraries(moco_lang PRIVATE stdex)
install(TARGETS moco_lang DESTINATION lib) # moco_tf_frontend requires moco_lang
if(NOT ENABLE_TEST)
* Note that this convention is against loco canonical's convention.
*/
class TFConv2DBackpropInput final
- : public FixedArityNode<3, TFNodeImpl<TFOpcode::Conv2DBackpropInput>>
+ : public FixedArityNode<3, TFNodeImpl<TFOpcode::Conv2DBackpropInput>>
{
public:
loco::Node *input_sizes(void) const { return at(0)->node(); }
{
class TFDepthwiseConv2dNative final
- : public FixedArityNode<2, TFNodeImpl<TFOpcode::DepthwiseConv2dNative>>
+ : public FixedArityNode<2, TFNodeImpl<TFOpcode::DepthwiseConv2dNative>>
{
public:
loco::Node *input(void) const { return at(0)->node(); }
{
class TFFakeQuantWithMinMaxVars final
- : public FixedArityNode<3, TFNodeImpl<TFOpcode::FakeQuantWithMinMaxVars>>
+ : public FixedArityNode<3, TFNodeImpl<TFOpcode::FakeQuantWithMinMaxVars>>
{
public:
loco::Node *inputs(void) const { return at(0)->node(); }
#include <loco/IR/GraphInputIndex.h>
#include <loco/IR/GraphOutputIndex.h>
-#include <stdex/Memory.h>
-
+#include <memory>
#include <cassert>
#include <stdexcept>
TFDialect::TFDialect()
{
- service<loco::GraphInputIndexQueryService>(stdex::make_unique<GiiQueryServiceImpl>());
- service<loco::GraphOutputIndexQueryService>(stdex::make_unique<GoiQueryServiceImpl>());
+ service<loco::GraphInputIndexQueryService>(std::make_unique<GiiQueryServiceImpl>());
+ service<loco::GraphOutputIndexQueryService>(std::make_unique<GoiQueryServiceImpl>());
}
loco::Dialect *TFDialect::get(void)
#include "moco/IR/TFNode.h"
#include "moco/IR/TFDialect.h"
+#include <memory>
#include <cassert>
namespace moco
} // namespace moco
-// TODO move this to appropriate place
-#include <stdex/Memory.h>
-
namespace moco
{
void index(TFPlaceholder *node, const loco::GraphInputIndex index)
{
- node->annot(stdex::make_unique<GraphInputIndexAnnotation>(index));
+ node->annot(std::make_unique<GraphInputIndexAnnotation>(index));
}
loco::TensorShape tensor_shape(const TFPlaceholder *node)
target_link_libraries(moco_pass PUBLIC logo_core)
target_link_libraries(moco_pass PUBLIC moco_lang)
target_link_libraries(moco_pass PRIVATE moco_support)
-target_link_libraries(moco_pass PRIVATE stdex)
target_link_libraries(moco_pass PRIVATE oops)
install(TARGETS moco_pass DESTINATION lib)
target_include_directories(moco_pass_test PRIVATE src)
target_link_libraries(moco_pass_test moco_pass)
target_link_libraries(moco_pass_test moco_support)
-target_link_libraries(moco_pass_test stdex)
/**
* @brief Constant folder for Const + Mul -> Const
-*/
+ */
class ConstantFoldMul : public logo::Pass
{
public:
/**
* @brief Constant folder for Const + Pack -> Const
-*/
+ */
class ConstantFoldPack : public logo::Pass
{
public:
/**
* @brief Constant folder for Const + StridedSlice -> Const
-*/
+ */
class ConstantFoldStridedSlice : public logo::Pass
{
public:
/**
* @brief Fuse TFAdd, TFMul to preceding TFConv2D or TFDepthWiseConv2D
-*/
+ */
class FuseBinaryIntoPreceding : public logo::Pass
{
public:
/**
* @brief Trasform TFFusedBatchNorm into TFAdd + TFRsqrt + TFMul + TFBatchNorm
-*/
+ */
class ResolveFusedBatchNorm : public logo::Pass
{
public:
#include <moco/IR/TFNodes.h>
#include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
#include <gtest/gtest.h>
}
setup_output_node(&graph, add_node);
- auto pass = stdex::make_unique<moco::ConstantFoldAdd>();
+ auto pass = std::make_unique<moco::ConstantFoldAdd>();
bool cont = true;
while (cont)
{
}
setup_output_node(&graph, add_node);
- auto pass = stdex::make_unique<moco::ConstantFoldAdd>();
+ auto pass = std::make_unique<moco::ConstantFoldAdd>();
bool cont = true;
while (cont)
{
for (uint32_t e = 0; e < nume; e++)
{
output->at<loco::DataType::S32>(e) =
- f.apply(lhs->at<loco::DataType::S32>(e), rhs->at<loco::DataType::S32>(e));
+ f.apply(lhs->at<loco::DataType::S32>(e), rhs->at<loco::DataType::S32>(e));
}
}
for (uint32_t e = 0; e < nume; e++)
{
output->at<loco::DataType::FLOAT32>(e) =
- f.apply(lhs->at<loco::DataType::FLOAT32>(e), rhs->at<loco::DataType::FLOAT32>(e));
+ f.apply(lhs->at<loco::DataType::FLOAT32>(e), rhs->at<loco::DataType::FLOAT32>(e));
}
}
#include <moco/IR/TFNodes.h>
#include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
#include <gtest/gtest.h>
}
setup_output_node(&graph, mul_node);
- auto pass = stdex::make_unique<moco::ConstantFoldMul>();
+ auto pass = std::make_unique<moco::ConstantFoldMul>();
bool cont = true;
while (cont)
{
}
setup_output_node(&graph, mul_node);
- auto pass = stdex::make_unique<moco::ConstantFoldMul>();
+ auto pass = std::make_unique<moco::ConstantFoldMul>();
bool cont = true;
while (cont)
{
#include <moco/IR/TFNodes.h>
#include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
#include <gtest/gtest.h>
identity->input(pack_node);
setup_output_node(&graph, identity);
- auto pass = stdex::make_unique<moco::ConstantFoldPack>();
+ auto pass = std::make_unique<moco::ConstantFoldPack>();
bool cont = true;
while (cont)
{
#include <moco/IR/TFNodes.h>
#include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
#include <gtest/gtest.h>
}
setup_output_node(&graph, sslice_node);
- auto pass = stdex::make_unique<moco::ConstantFoldStridedSlice>();
+ auto pass = std::make_unique<moco::ConstantFoldStridedSlice>();
bool cont = true;
while (cont)
{
}
setup_output_node(&graph, sslice_node);
- auto pass = stdex::make_unique<moco::ConstantFoldStridedSlice>();
+ auto pass = std::make_unique<moco::ConstantFoldStridedSlice>();
bool cont = true;
while (cont)
{
}
setup_output_node(&graph, sslice_node);
- auto pass = stdex::make_unique<moco::ConstantFoldStridedSlice>();
+ auto pass = std::make_unique<moco::ConstantFoldStridedSlice>();
bool cont = true;
while (cont)
{
}
setup_output_node(&graph, sslice_node);
- auto pass = stdex::make_unique<moco::ConstantFoldStridedSlice>();
+ auto pass = std::make_unique<moco::ConstantFoldStridedSlice>();
bool cont = true;
while (cont)
{
fused_node = fused_conv_node<FuseType::Conv2D, moco::TFConv2D>(graph, mulparam, conv2d);
else if (auto dw_conv2d = dynamic_cast<moco::TFDepthwiseConv2dNative *>(precedingOp))
fused_node = fused_conv_node<FuseType::DepthwiseConv2D, moco::TFDepthwiseConv2dNative>(
- graph, mulparam, dw_conv2d);
+ graph, mulparam, dw_conv2d);
// Not ready yet
if (fused_node == nullptr)
}
}
{
- // TODO support Div
+ // TODO support Div
}
{
#include <loco/IR/NodeShape.h>
#include <loco/Service/ShapeInference.h>
-#include <stdex/Memory.h>
-
namespace
{
require("loco")
require("locop")
-require("stdex")
require("moco-log")
require("plier-tf")
require("mio-tf")
target_link_libraries(moco_service PUBLIC moco_lang)
target_link_libraries(moco_service PRIVATE moco_support)
target_link_libraries(moco_service PRIVATE nncc_common)
-target_link_libraries(moco_service PRIVATE stdex)
target_link_libraries(moco_service PRIVATE oops)
install(TARGETS moco_service DESTINATION lib)
// output count is from input count, depth is from kernel 'CM' which is dim(2) * dim(3)
auto output_feature_shape = input_feature_shape;
output_feature_shape.depth() =
- loco::Dimension(ker_tensor_shape.dim(2).value() * ker_tensor_shape.dim(3).value());
+ loco::Dimension(ker_tensor_shape.dim(2).value() * ker_tensor_shape.dim(3).value());
auto output_plane_shape = infer_plane_shape(input_plane_shape);
if (_padding == "VALID")
{
res.height =
- (p.input.height.value() + p.stride.height.value() - p.effective_window.height.value()) /
- p.stride.height.value();
+ (p.input.height.value() + p.stride.height.value() - p.effective_window.height.value()) /
+ p.stride.height.value();
res.width =
- (p.input.width.value() + p.stride.width.value() - p.effective_window.width.value()) /
- p.stride.width.value();
+ (p.input.width.value() + p.stride.width.value() - p.effective_window.width.value()) /
+ p.stride.width.value();
}
else if (_padding == "SAME")
{
{
public:
AddNode(const std::shared_ptr<expr::Node> &lhs, const std::shared_ptr<expr::Node> &rhs)
- : _lhs{lhs}, _rhs{rhs}
+ : _lhs{lhs}, _rhs{rhs}
{
// DO NOTHING
}
public:
template <typename... Args>
DerefNode(const DomainID &id, Args &&... indicies)
- : _id{id}, _sub{std::forward<Args>(indicies)...}
+ : _id{id}, _sub{std::forward<Args>(indicies)...}
{
// DO NOTHING
}
{
public:
MulNode(const std::shared_ptr<expr::Node> &lhs, const std::shared_ptr<expr::Node> &rhs)
- : _lhs{lhs}, _rhs{rhs}
+ : _lhs{lhs}, _rhs{rhs}
{
// DO NOTHING
}
{
// Dummy Node for testing
};
-}
+} // namespace
TEST(BLOCK, use_case_1)
{
struct DummyNode final : public nest::expr::Node
{
};
-}
+} // namespace
TEST(Closure, ctor)
{
struct DummyNode final : public nest::expr::Node
{
};
-}
+} // namespace
TEST(EXPR, operator_sum)
{
struct DummyNode final : public nest::expr::Node
{
};
-}
+} // namespace
TEST(RET, ctor)
{
struct DummyNode final : public nest::expr::Node
{
};
-}
+} // namespace
TEST(ADD_NODE, cast)
{
struct DummyNode final : public nest::expr::Node
{
};
-}
+} // namespace
TEST(DEREF_NODE, cast)
{
struct DummyNode final : public nest::expr::Node
{
};
-}
+} // namespace
TEST(MUL_NODE, cast)
{
struct DummyExprNode final : public nest::expr::Node
{
};
-}
+} // namespace
TEST(STMT_PUSH_NODE, cast)
{
namespace fs = boost::filesystem;
AclCppCodeGenerator::AclCppCodeGenerator(string output_dir, string artifact_name)
- : _output_dir(std::move(output_dir)), _artifact_name(std::move(artifact_name))
+ : _output_dir(std::move(output_dir)), _artifact_name(std::move(artifact_name))
{
}
using namespace mir;
AclCppOpGenerator::AclCppOpGenerator(const string &name, ostream &par_out)
- : _parOut(par_out), _module(name), _constrBlock(nullptr), _infBlock(nullptr),
- _clScheduler(AF::id("arm_compute::CLScheduler"))
+ : _parOut(par_out), _module(name), _constrBlock(nullptr), _infBlock(nullptr),
+ _clScheduler(AF::id("arm_compute::CLScheduler"))
{
}
_parInVar = _artifactClass->var(false, "std::ifstream", "_parIn");
_parIn = _parInVar->use();
string par_file_name = _module.name() + ".par";
- _constrBlock->call("open", {AF::lit("\"" + par_file_name + "\""),
- AF::lit("std::ios_base::in | std::ios_base::binary")},
- _parIn);
+ _constrBlock->call(
+ "open",
+ {AF::lit("\"" + par_file_name + "\""), AF::lit("std::ios_base::in | std::ios_base::binary")},
+ _parIn);
auto file_fail = _constrBlock->ifCond(AF::call("fail", {}, _parIn));
auto file_fail_block = file_fail->getBlock();
file_fail_block->addStatement(
- AF::lit("throw std::string(\"Failed to open file: " + par_file_name + " for reading\")"));
+ AF::lit("throw std::string(\"Failed to open file: " + par_file_name + " for reading\")"));
// Traverse the computational graph.
g->accept(this);
const auto *ir_output = op.getOutput(0);
static const char *axis_names[] = {
- "arm_compute::DataLayoutDimension::BATCHES", "arm_compute::DataLayoutDimension::CHANNEL",
- "arm_compute::DataLayoutDimension::HEIGHT", "arm_compute::DataLayoutDimension::WIDTH"};
+ "arm_compute::DataLayoutDimension::BATCHES", "arm_compute::DataLayoutDimension::CHANNEL",
+ "arm_compute::DataLayoutDimension::HEIGHT", "arm_compute::DataLayoutDimension::WIDTH"};
int axis = op.getAxis();
assert(axis >= 0 && axis < static_cast<int>(sizeof(axis_names) / sizeof(axis_names[0])) &&
for (const Operation::Output *ir_input : ir_inputs)
_constrBlock->call("push_back", {AF::ref(AF::id(tensorName(ir_input)))}, inputs);
- auto layer = genLayer("arm_compute::CLConcatenateLayer", prefix,
- {inputs, AF::ref(out), AF::lit(axis_name)});
+ auto layer =
+ genLayer("arm_compute::CLConcatenateLayer", prefix, {inputs, AF::ref(out), AF::lit(axis_name)});
addToPersistentTensors(out);
genLayerExecution(layer);
string var_name = prefix + "_pad_stride_info";
list<std::shared_ptr<ArtifactExpr>> var_init_params = {
- AF::lit(to_string(strides.dim(1))),
- AF::lit(to_string(strides.dim(0))),
- AF::lit(to_string(padding_before.at(1))),
- AF::lit(to_string(padding_after.at(1))),
- AF::lit(to_string(padding_before.at(0))),
- AF::lit(to_string(padding_after.at(0))),
- AF::lit("arm_compute::DimensionRoundingType::FLOOR")};
+ AF::lit(to_string(strides.dim(1))),
+ AF::lit(to_string(strides.dim(0))),
+ AF::lit(to_string(padding_before.at(1))),
+ AF::lit(to_string(padding_after.at(1))),
+ AF::lit(to_string(padding_before.at(0))),
+ AF::lit(to_string(padding_after.at(0))),
+ AF::lit("arm_compute::DimensionRoundingType::FLOOR")};
auto pad_stride_info_var = block->var(type_name, var_name, {}, var_init_params);
// themselves,
// so we don't serialize them here, also we don't serialize tensors from dangling ConstantOp
static std::map<Operation::Type, std::size_t> self_serializing_ops_to_inputs{
- {Operation::Type::conv2D, 1}, {Operation::Type::fullyConnected, 1}};
+ {Operation::Type::conv2D, 1}, {Operation::Type::fullyConnected, 1}};
for (Operation::Use use : op.getOutput(0)->getUses())
{
for (int i = 0; i < ir_input->getShape().rank(); ++i)
{
auto pad_var = _constrBlock->var(
- "arm_compute::PaddingInfo", prefix + "_pad_" + to_string(i), {},
- {AF::lit(to_string(padding_before[i])), AF::lit(to_string(padding_after[i]))});
+ "arm_compute::PaddingInfo", prefix + "_pad_" + to_string(i), {},
+ {AF::lit(to_string(padding_before[i])), AF::lit(to_string(padding_after[i]))});
auto pad = pad_var->use();
_constrBlock->call("push_back", {pad}, pad_list);
}
// FIXME Set up the `constant_value` parameter.
assert(op.getPaddingValue() == 0.0f);
auto layer =
- genLayer("arm_compute::CLPadLayer", prefix, {AF::ref(input), AF::ref(out), pad_list});
+ genLayer("arm_compute::CLPadLayer", prefix, {AF::ref(input), AF::ref(out), pad_list});
genLayerExecution(layer);
}
// Transpose data from MIR format to format compatible with ACL
const string transposed_input_name = output_tensor_name + "transposed_input";
shared_ptr<ArtifactId> transposed_input =
- genTransposeMIRtoACL(transposed_input_name, ir_input->getShape(), in_id);
+ genTransposeMIRtoACL(transposed_input_name, ir_input->getShape(), in_id);
const string layer_name = output_tensor_name + "_pooling_layer";
// Create kernel window info
shared_ptr<ArtifactVariable> kernel_window_var = _constrBlock->var(
- "arm_compute::Size2D", layer_name + "_kernel_window", {},
- {AF::lit(to_string(op.getWindowSize()[1])), AF::lit(to_string(op.getWindowSize()[0]))});
+ "arm_compute::Size2D", layer_name + "_kernel_window", {},
+ {AF::lit(to_string(op.getWindowSize()[1])), AF::lit(to_string(op.getWindowSize()[0]))});
shared_ptr<ArtifactId> kernel_window = kernel_window_var->use();
// Create pooling info: pooling type, kernel info, strides, etc
shared_ptr<ArtifactVariable> pooling_info_var =
- _constrBlock->var("arm_compute::PoolingLayerInfo", layer_name + "_pooling_info", {},
- {AF::lit(pooling_type), kernel_window, pad_stride_info,
- AF::lit(exclude_padding ? "true" : "false")});
+ _constrBlock->var("arm_compute::PoolingLayerInfo", layer_name + "_pooling_info", {},
+ {AF::lit(pooling_type), kernel_window, pad_stride_info,
+ AF::lit(exclude_padding ? "true" : "false")});
shared_ptr<ArtifactId> pooling_info = pooling_info_var->use();
// Generate auxiliary tensor to hold transposed output of pool in NCHW format
Shape transposed_output_shape = transposeShape<0, 3, 1, 2>(ir_output->getShape());
shared_ptr<ArtifactId> transposed_output =
- genTensor(layer_name + "_out_transpose", transposed_output_shape);
+ genTensor(layer_name + "_out_transpose", transposed_output_shape);
// Actual layer creation
shared_ptr<ArtifactId> layer =
- genLayer("arm_compute::CLPoolingLayer", layer_name,
- {AF::ref(transposed_input), AF::ref(transposed_output), pooling_info});
+ genLayer("arm_compute::CLPoolingLayer", layer_name,
+ {AF::ref(transposed_input), AF::ref(transposed_output), pooling_info});
genTensorAllocation(_infBlock, transposed_output);
genLayerExecution(layer);
shared_ptr<ArtifactId> output =
- genTransposeACLtoMIR(output_tensor_name, transposed_output_shape, transposed_output);
+ genTransposeACLtoMIR(output_tensor_name, transposed_output_shape, transposed_output);
genTensorDeallocation(_infBlock, transposed_input);
genTensorDeallocation(_infBlock, transposed_output);
// Generate auxiliary tensor to hold transposed input of convolution in NCHW format
shared_ptr<ArtifactId> transposed_input =
- genTransposeMIRtoACL(output_tensor_name + "_transposed_input", ir_input->getShape(), input);
+ genTransposeMIRtoACL(output_tensor_name + "_transposed_input", ir_input->getShape(), input);
// Create the transposed output tensor in the DOM.
const string transposed_output_name = output_tensor_name + "_transposed_output";
Shape transposed_output_shape = transposeShape<0, 3, 1, 2>(ir_output->getShape());
shared_ptr<ArtifactId> transposed_output =
- genTensor(transposed_output_name, transposed_output_shape);
+ genTensor(transposed_output_name, transposed_output_shape);
string operation_name = output_tensor_name + suffix;
// Generate auxiliar tensor to hold transposed output of convolution in NHWC format
shared_ptr<ArtifactId> output =
- genTransposeACLtoMIR(output_tensor_name, transposed_output_shape, transposed_output);
+ genTransposeACLtoMIR(output_tensor_name, transposed_output_shape, transposed_output);
genTensorDeallocation(_infBlock, transposed_input);
genTensorDeallocation(_infBlock, transposed_output);
// constructor. This instance profide information about the concrete activation function,
// like: ReLU, Tanh etc and two optional parameter (alpha and betha) needed by some activations.
auto activation_info_var = _constrBlock->var(
- "arm_compute::ActivationLayerInfo", prefix + "_activation_info", {},
- {AF::lit("arm_compute::ActivationLayerInfo::ActivationFunction::" + activation_name),
- AF::lit(to_string(a)), AF::lit(to_string(b))});
+ "arm_compute::ActivationLayerInfo", prefix + "_activation_info", {},
+ {AF::lit("arm_compute::ActivationLayerInfo::ActivationFunction::" + activation_name),
+ AF::lit(to_string(a)), AF::lit(to_string(b))});
auto activation_info = activation_info_var->use();
// Create an instance of the CLActivationLayer class as a member of the artifact class.
auto arithmetic_add_layer = arithmetic_add_layer_var->use();
// Generate the call: arithmetic_add_layer.configure(&in1, &in2, &out);
- _constrBlock->call("configure", {AF::ref(in1), AF::ref(in2), AF::ref(out),
- AF::lit("arm_compute::ConvertPolicy::WRAP")},
- arithmetic_add_layer);
+ _constrBlock->call(
+ "configure",
+ {AF::ref(in1), AF::ref(in2), AF::ref(out), AF::lit("arm_compute::ConvertPolicy::WRAP")},
+ arithmetic_add_layer);
// Generate the call: arithmetic_add_layer.run();
_infBlock->call("run", {}, arithmetic_add_layer);
if (!tensor_name.empty())
{
tensor_name = "_" + tensor_name;
- replace_if(tensor_name.begin(), tensor_name.end(), [](char c) { return std::isalnum(c) == 0; },
- '_');
+ replace_if(
+ tensor_name.begin(), tensor_name.end(), [](char c) { return std::isalnum(c) == 0; }, '_');
}
else
{
const char *type_name = "arm_compute::TensorShape";
shared_ptr<ArtifactId> shape =
- genVectorInitializedVar(_constrBlock, type_name, name + "_shape", shape_vectorized);
+ genVectorInitializedVar(_constrBlock, type_name, name + "_shape", shape_vectorized);
_constrBlock->call("initializeTensor", {id, shape});
if (gen_accessor)
// Create operation parameter containing permutation vector
shared_ptr<ArtifactId> perm_vector = genVectorInitializedVar(
- _constrBlock, "arm_compute::PermutationVector", out_name + "_perm_param", acl_perm);
+ _constrBlock, "arm_compute::PermutationVector", out_name + "_perm_param", acl_perm);
// Instantiate the CLPermute object.
string layer_name = out_name + "_transpose_layer";
ArtifactFunctionCall::ArtifactFunctionCall(string func_name,
list<shared_ptr<ArtifactExpr>> param_list,
shared_ptr<ArtifactExpr> on, ArtifactCallType call_type)
- : _funcName(std::move(func_name)), _callType(call_type), _on(std::move(on)),
- _paramList(std::move(param_list))
+ : _funcName(std::move(func_name)), _callType(call_type), _on(std::move(on)),
+ _paramList(std::move(param_list))
{
}
{
public:
ArtifactUnaryExpr(ArtifactUnOp op, std::shared_ptr<ArtifactExpr> expr)
- : _op(op), _expr(std::move(expr))
+ : _op(op), _expr(std::move(expr))
{
}
public:
ArtifactBinaryExpr(ArtifactBinOp op, std::shared_ptr<ArtifactExpr> left,
std::shared_ptr<ArtifactExpr> right)
- : _op(op), _left(std::move(left)), _right(std::move(right))
+ : _op(op), _left(std::move(left)), _right(std::move(right))
{
}
{
public:
ArtifactIndex(std::shared_ptr<ArtifactExpr> expr, std::shared_ptr<ArtifactExpr> ind)
- : _expr(std::move(expr)), _ind(std::move(ind))
+ : _expr(std::move(expr)), _ind(std::move(ind))
{
}
ArtifactVariable(std::string type_name, std::string var_name,
std::list<std::shared_ptr<ArtifactExpr>> dimensions = {},
std::list<std::shared_ptr<ArtifactExpr>> initializers = {})
- : _typeName(std::move(type_name)), _dimensions(std::move(dimensions)),
- _initializers(std::move(initializers)), ArtifactNamed(std::move(var_name))
+ : _typeName(std::move(type_name)), _dimensions(std::move(dimensions)),
+ _initializers(std::move(initializers)), ArtifactNamed(std::move(var_name))
{
}
explicit ArtifactForLoop(std::shared_ptr<ArtifactVariable> init = nullptr,
std::shared_ptr<ArtifactExpr> cond = nullptr,
std::shared_ptr<ArtifactExpr> iter = nullptr)
- : _init(std::move(init)), _cond(std::move(cond)), _iter(std::move(iter))
+ : _init(std::move(init)), _cond(std::move(cond)), _iter(std::move(iter))
{
}
*/
ArtifactFunction(std::string ret_type_name, const std::string &func_name,
std::list<std::shared_ptr<ArtifactVariable>> params = {})
- : ArtifactNamed(func_name), _params(std::move(params)), _retTypeName(std::move(ret_type_name))
+ : ArtifactNamed(func_name), _params(std::move(params)), _retTypeName(std::move(ret_type_name))
{
}
const std::string &var_name,
const std::list<std::shared_ptr<ArtifactExpr>> &dimensions = {},
const std::list<std::shared_ptr<ArtifactExpr>> &initializers = {})
- : ArtifactClassMember(owner), ArtifactVariable(type_name, var_name, dimensions, initializers)
+ : ArtifactClassMember(owner), ArtifactVariable(type_name, var_name, dimensions, initializers)
{
}
ArtifactClassFunction(const ArtifactClass *owner, const std::string &ret_type_name,
const std::string &func_name,
const std::list<std::shared_ptr<ArtifactVariable>> ¶ms = {})
- : ArtifactClassMember(owner), ArtifactFunction(ret_type_name, func_name, params)
+ : ArtifactClassMember(owner), ArtifactFunction(ret_type_name, func_name, params)
{
}
static TensorVariant readTensorFromFile(const std::string &filename, const TensorType &type)
{
const std::size_t input_data_size =
- type.getShape().numElements() * getDataTypeSize(type.getElementType());
+ type.getShape().numElements() * getDataTypeSize(type.getElementType());
std::ifstream stream(filename, std::ios::in | std::ios::binary);
if (stream.fail())
int64_t file_size = end - begin;
if (static_cast<std::size_t>(file_size) != input_data_size)
- throw std::runtime_error("File \"" + filename + "\" has incorrect size: " +
- std::to_string(file_size) + "(expected: " +
- std::to_string(input_data_size) + ").");
+ throw std::runtime_error("File \"" + filename +
+ "\" has incorrect size: " + std::to_string(file_size) +
+ "(expected: " + std::to_string(input_data_size) + ").");
std::unique_ptr<char[]> data(new char[input_data_size]);
stream.read(data.get(), input_data_size);
}
InterpreterBackend::InterpreterBackend(std::string input_dir, std::string output_dir)
- : _input_dir(std::move(input_dir)), _output_dir(std::move(output_dir))
+ : _input_dir(std::move(input_dir)), _output_dir(std::move(output_dir))
{
}
}
CPPCodeGenerator::CPPCodeGenerator(std::string output_dir, std::string artifact_name)
- : _output_dir(std::move(output_dir)), _artifact_name(std::move(artifact_name))
+ : _output_dir(std::move(output_dir)), _artifact_name(std::move(artifact_name))
{
}
string class_name = ma.getModelName() + "Model";
out.write(cpp_header_types, sizeof(cpp_header_types));
- out << "class " << class_name << "\n"
- "{\n"
- "public:\n"
- " "
- << class_name << "(const std::string& parametersPath);\n"
- " ~"
+ out << "class " << class_name
+ << "\n"
+ "{\n"
+ "public:\n"
+ " "
+ << class_name
+ << "(const std::string& parametersPath);\n"
+ " ~"
<< class_name << "();\n";
// generate input setters
if (ma.getInputs().size() == 1)
out << " void doInference();\n\n"
"private:\n"
" "
- << class_name << "() = delete;\n"
- " "
- << class_name << "(const " << class_name << "& orig) = delete;\n"
- " "
+ << class_name
+ << "() = delete;\n"
+ " "
+ << class_name << "(const " << class_name
+ << "& orig) = delete;\n"
+ " "
<< class_name << "& operator=(const " << class_name << "& orig) = delete;\n";
// generate input/output tensors
for (const size_t in_tensor_id : ma.getInputs())
{
const string &var_name = _formattedTensors[td.id];
- out << "bool " << class_name << "::set" << setter_name << "(const Tensor& t)\n"
- "{\n";
+ out << "bool " << class_name << "::set" << setter_name
+ << "(const Tensor& t)\n"
+ "{\n";
// need to insert input correctness check
const mir::Shape expected = td.shape;
int rank = expected.rank();
out << " "
<< "if (t.getShape()[" << i << "] != " << expected.dim(i) << ") return false;\n";
}
- out << " " << var_name << " = t;\n"
- " return true;\n"
- "}\n\n";
+ out << " " << var_name
+ << " = t;\n"
+ " return true;\n"
+ "}\n\n";
}
void CPPCodeGenerator::printGetter(ostream &out, const string &class_name,
{
const string &var_name = _formattedTensors[td.id];
- out << "shared_ptr<Tensor> " << class_name << "::get" << getter_name << "()\n"
- "{\n"
- " return "
- << var_name << ";\n"
- "}\n\n";
+ out << "shared_ptr<Tensor> " << class_name << "::get" << getter_name
+ << "()\n"
+ "{\n"
+ " return "
+ << var_name
+ << ";\n"
+ "}\n\n";
}
void CPPCodeGenerator::materializeCall(ostream &out, const ModelAnalyzer &ma,
<< "(const string& parametersPath)\n"
"{\n"
" readParameters(_parameters, _paramSize, parametersPath, "
- << s.getFormatVersion() << ", " << s.getModelHash() << ");\n"
- "}\n\n";
+ << s.getFormatVersion() << ", " << s.getModelHash()
+ << ");\n"
+ "}\n\n";
// gen NN destructor
- out << class_name << "::~" << class_name << "()\n"
- "{\n"
- " releaseParameters(_parameters, _paramSize);\n"
- "}\n\n";
+ out << class_name << "::~" << class_name
+ << "()\n"
+ "{\n"
+ " releaseParameters(_parameters, _paramSize);\n"
+ "}\n\n";
// generate input setters
// generate main setter if network has only one
const auto &inputs = ma.getInputs();
const TensorDescriptor &td = tensors[output_tensor_id];
printGetter(out, class_name, output_tensor_name, td);
}
- out << "void " << class_name << "::doInference()\n"
- "{\n";
+ out << "void " << class_name
+ << "::doInference()\n"
+ "{\n";
for (size_t output_tensor_id : ma.getPersistentTensors())
{
const string &output_tensor_name = _formattedTensors[output_tensor_id];
{
const auto &tensor_name = output.getName();
const auto tensor_id =
- tensor_name.empty() ? declareTemporaryTensor() : declarePersistentTensor(tensor_name);
+ tensor_name.empty() ? declareTemporaryTensor() : declarePersistentTensor(tensor_name);
node_output_tensors.push_back(tensor_id);
}
}
std::copy(aux_args.begin(), aux_args.end(), std::back_inserter(node_input_tensors));
unique_ptr<Action> operation_call(new CallFunction(
- op, function_name, std::move(node_input_tensors), std::move(node_output_tensors)));
+ op, function_name, std::move(node_input_tensors), std::move(node_output_tensors)));
_inferenceSequence.push_back(std::move(operation_call));
_opToDescr[op] = _inferenceSequence.back().get();
}
{
public:
/**
- * @brief contructs inference sequence
- * @param g pointer to graph to linearize
- */
+ * @brief contructs inference sequence
+ * @param g pointer to graph to linearize
+ */
void analyze(const mir::Graph *g);
void visit(mir::ops::AbsOp &) override;
{
TransposeTensor(size_t input, size_t output, std::vector<int32_t> &&perm)
- : Action(Type::transposeTensor), perm(std::move(perm)), input(input), output(output)
+ : Action(Type::transposeTensor), perm(std::move(perm)), input(input), output(output)
{
}
CallFunction(mir::Operation *op, std::string func_name, std::vector<size_t> &&inputs,
std::vector<size_t> &&outputs)
- : Action(Type::callFunction), mirOp(op), funcName(std::move(func_name)), inputs(inputs),
- outputs(outputs), paramStartOffset(0)
+ : Action(Type::callFunction), mirOp(op), funcName(std::move(func_name)), inputs(inputs),
+ outputs(outputs), paramStartOffset(0)
{
}
#else
showopt(false)
#endif // NNC_FRONTEND_CAFFE_ENABLED
- );
+);
Option<bool> onnxFrontend(optname("--onnx"), overview("treat input file as ONNX model"), false,
optional(true), optvalues(""), nullptr, separators(""),
#ifdef NNC_FRONTEND_ONNX_ENABLED
#else
showopt(false)
#endif // NNC_FRONTEND_ONNX_ENABLED
- );
+);
Option<bool> caffe2Frontend(optname("--caffe2"),
overview("treat input file as Caffe2 model (predict_net.pb)"), false,
#else
showopt(false)
#endif // NNC_FRONTEND_TFLITE_ENABLED
- );
+);
Option<std::string>
- target(optname("--target"),
- overview("select target language to emit for given architecture."
- "Valid values are '" NNC_TARGET_ARM_CPP "', '" NNC_TARGET_X86_CPP
- "', '" NNC_TARGET_ARM_GPU_CPP "', '" NNC_TARGET_INTERPRETER "'"),
- std::string(), optional(false),
- optvalues(NNC_TARGET_ARM_CPP "," NNC_TARGET_X86_CPP "," NNC_TARGET_ARM_GPU_CPP
- "," NNC_TARGET_INTERPRETER),
- nullptr, separators("="));
+ target(optname("--target"),
+ overview("select target language to emit for given architecture."
+ "Valid values are '" NNC_TARGET_ARM_CPP "', '" NNC_TARGET_X86_CPP
+ "', '" NNC_TARGET_ARM_GPU_CPP "', '" NNC_TARGET_INTERPRETER "'"),
+ std::string(), optional(false),
+ optvalues(NNC_TARGET_ARM_CPP "," NNC_TARGET_X86_CPP "," NNC_TARGET_ARM_GPU_CPP
+ "," NNC_TARGET_INTERPRETER),
+ nullptr, separators("="));
/**
* Options for *frontend*
{
public:
/* implicit */ PassData(std::nullptr_t data)
- : // NOLINT(google-explicit-constructor, hicpp-explicit-conversions)
- _dataContainer{.unknown = data},
- _dataType(PDT::UNKNOWN)
+ : // NOLINT(google-explicit-constructor, hicpp-explicit-conversions)
+ _dataContainer{.unknown = data}, _dataType(PDT::UNKNOWN)
{
}
* @brief Implicit conversion from Graph* to PassData
*/
/* implicit */ PassData(mir::Graph *graph)
- : // NOLINT(google-explicit-constructor, hicpp-explicit-conversions)
- _dataContainer{.graph = graph},
- _dataType(PDT::GRAPH)
+ : // NOLINT(google-explicit-constructor, hicpp-explicit-conversions)
+ _dataContainer{.graph = graph}, _dataType(PDT::GRAPH)
{
}
* @brief Implicit conversion from Graph* to PassData
*/
/* implicit */ PassData(mir::TensorVariant *tv)
- : // NOLINT(google-explicit-constructor, hicpp-explicit-conversions)
- _dataContainer{.tensorVariant = tv},
- _dataType(PDT::TENSOR_VARIANT)
+ : // NOLINT(google-explicit-constructor, hicpp-explicit-conversions)
+ _dataContainer{.tensorVariant = tv}, _dataType(PDT::TENSOR_VARIANT)
{
}
PassData run(PassData data) override;
std::string getName() override { return "opt_combine_transposes"; };
+
private:
};
namespace opt_util
{
/**
-* @brief Swap adjacent nodes in Graph. Creates new nodes and replaces the old ones with new.
-* @param g MIR Graph
-* @param top Node
-* @param bottom Node
-*/
+ * @brief Swap adjacent nodes in Graph. Creates new nodes and replaces the old ones with new.
+ * @param g MIR Graph
+ * @param top Node
+ * @param bottom Node
+ */
void swapAdjacent(mir::Graph *g, mir::Operation *top, mir::Operation *bottom);
// TODO: this function and it's usages should be removed, after DCE optimization will be implemented
{
public:
explicit BadOption(const std::string &msg, std::string optname = "", std::string value = "")
- : std::logic_error(msg), _option_name(std::move(optname)), _option_value(std::move(value))
+ : std::logic_error(msg), _option_name(std::move(optname)), _option_value(std::move(value))
{
}
std::map<std::string, IOption *> _options_name; // map of name -> option
std::vector<IOption *> _options; // options
std::map<IOption::Group, std::vector<IOption *>>
- _grouped_options; // map of groups: group -> vector of options
+ _grouped_options; // map of groups: group -> vector of options
std::string _prog_name; // name of program
int _args_num = 0; // number of command line arguments
};
_group = group;
_can_have_several_vals =
- std::is_same<T, std::vector<std::string>>::value || std::is_same<T, std::vector<int>>::value;
+ std::is_same<T, std::vector<std::string>>::value || std::is_same<T, std::vector<int>>::value;
assert(!(_can_have_several_vals && !_seps.empty()) &&
"option with several values can't have separators");
};
auto *bottom_transpose = dynamic_cast<mir::ops::TransposeOp *>(match.second);
auto combined_axis_order =
- combineAxisOrders(top_transpose->getAxisOrder(), bottom_transpose->getAxisOrder());
+ combineAxisOrders(top_transpose->getAxisOrder(), bottom_transpose->getAxisOrder());
if (!isIdentityTranspose(combined_axis_order))
{
auto new_tr_op =
- g->create<mir::ops::TransposeOp>(top_transpose->getInput(0), combined_axis_order);
+ g->create<mir::ops::TransposeOp>(top_transpose->getInput(0), combined_axis_order);
g->replaceNode(bottom_transpose, new_tr_op);
}
return;
bool has_no_uses =
- std::all_of(op->getOutputs().cbegin(), op->getOutputs().cend(),
- [](const Operation::Output &output) { return output.getUses().empty(); });
+ std::all_of(op->getOutputs().cbegin(), op->getOutputs().cend(),
+ [](const Operation::Output &output) { return output.getUses().empty(); });
if (has_no_uses)
{
// Create new operations
auto old_add_input = old_add_op->getInput(0);
auto new_mul_op =
- g->copyOpWithInputs(old_mul_op, {old_add_input, ols_mul_const_op->getOutput(0)});
+ g->copyOpWithInputs(old_mul_op, {old_add_input, ols_mul_const_op->getOutput(0)});
auto new_add_const_op = mergeConstantOps(g, old_add_const_op, ols_mul_const_op, OpType::mul);
auto new_add_op =
- g->copyOpWithInputs(old_add_op, {new_mul_op->getOutput(0), new_add_const_op->getOutput(0)});
+ g->copyOpWithInputs(old_add_op, {new_mul_op->getOutput(0), new_add_const_op->getOutput(0)});
// Replace old mul with new add and remove old nodes
g->replaceNode(old_mul_op, new_add_op);
namespace nnc
{
DataFormatSwitcher::DataFormatSwitcher(const mir::DataFormat target_format)
- : _target_format(target_format)
+ : _target_format(target_format)
{
}
mir::Operation::Output *new_out;
if (_target_format == mir::DataFormat::NHWC)
new_out = _graph->create<mir::ops::TransposeOp>(out, std::vector<std::size_t>{0, 2, 3, 1})
- ->getOutput(0); // NCHW -> NHWC
+ ->getOutput(0); // NCHW -> NHWC
else
new_out = _graph->create<mir::ops::TransposeOp>(out, std::vector<std::size_t>{0, 3, 1, 2})
- ->getOutput(0); // NHWC -> NCHW
+ ->getOutput(0); // NHWC -> NCHW
if (out->getType().isQuantized())
new_out->setQuantization(out->getType().getQuantization());
return new_out;
mir::Operation::Output *new_out;
if (_target_format == mir::DataFormat::NHWC)
new_out = _graph->create<mir::ops::TransposeOp>(out, std::vector<std::size_t>{0, 3, 1, 2})
- ->getOutput(0); // NHWC -> NCHW
+ ->getOutput(0); // NHWC -> NCHW
else
new_out = _graph->create<mir::ops::TransposeOp>(out, std::vector<std::size_t>{0, 2, 3, 1})
- ->getOutput(0); // NCHW -> NHWC
+ ->getOutput(0); // NCHW -> NHWC
if (out->getType().isQuantized())
new_out->setQuantization(out->getType().getQuantization());
return new_out;
// [O, H, W, I / M] == [M, H, W, 1] -> [H, W, M, 1]
std::vector<std::size_t> perm{1, 2, 0, 3};
mir::Operation::Output *new_kernel =
- graph->create<mir::ops::TransposeOp>(kernel, perm)->getOutput(0);
+ graph->create<mir::ops::TransposeOp>(kernel, perm)->getOutput(0);
mir::Conv2DOpAttributes attributes = op->getAttributes();
attributes.num_groups = 1;
mir::Operation::Output *new_result =
- graph->create<mir::ops::DepthwiseConv2DOp>(input, new_kernel, attributes)->getOutput(0);
+ graph->create<mir::ops::DepthwiseConv2DOp>(input, new_kernel, attributes)->getOutput(0);
graph->replaceNode(op, new_result->getNode());
}
}
// Copy the model input HDF5 file to the remote device.
ASSERT_TRUE(
- copyToOdroid(binDir + "/" + name + "/in_" + name + "_caffe.hdf5", dir_name + "/in.hdf5"));
+ copyToOdroid(binDir + "/" + name + "/in_" + name + "_caffe.hdf5", dir_name + "/in.hdf5"));
// Switch to the artifact directory on the remote device and run the artifact.
ASSERT_TRUE(runOnOdroid("cd " + dir_name + "; ./nnc_test"));
Iterator i(&tensor, window);
char *ptr = &buf[0];
- execute_window_loop(window,
- [&i, &ptr](const Coordinates &) {
- memcpy(ptr, i.ptr(), sizeof(float));
- ptr += sizeof(float);
- },
- i);
+ execute_window_loop(
+ window,
+ [&i, &ptr](const Coordinates &) {
+ memcpy(ptr, i.ptr(), sizeof(float));
+ ptr += sizeof(float);
+ },
+ i);
tensor.unmap();
return buf;
Iterator i(&tensor, window);
char *ptr = &buf[0];
- execute_window_loop(window,
- [&i, &ptr](const Coordinates &) {
- memcpy(i.ptr(), ptr, sizeof(float));
- ptr += sizeof(float);
- },
- i);
+ execute_window_loop(
+ window,
+ [&i, &ptr](const Coordinates &) {
+ memcpy(i.ptr(), ptr, sizeof(float));
+ ptr += sizeof(float);
+ },
+ i);
tensor.unmap();
}
string target_compiler = "g++ -Wall --std=c++11";
string compiler_command =
- target_compiler + " -I" + output_dir + " " + main_path + " " + code_path;
+ target_compiler + " -I" + output_dir + " " + main_path + " " + code_path;
// call compiler
int res = system(compiler_command.c_str());
const char *var_name = "id";
shared_ptr<ArtifactId> var = AF::id(var_name);
pair<ArtifactUnOp, const char *> test_cases[] = {
- {ArtifactUnOp::preIncr, "++id"}, {ArtifactUnOp::preDecr, "--id"},
- {ArtifactUnOp::heapNew, "new id"}, {ArtifactUnOp::heapFree, "delete id"},
- {ArtifactUnOp::postIncr, "id++"}, {ArtifactUnOp::postDecr, "id--"}};
+ {ArtifactUnOp::preIncr, "++id"}, {ArtifactUnOp::preDecr, "--id"},
+ {ArtifactUnOp::heapNew, "new id"}, {ArtifactUnOp::heapFree, "delete id"},
+ {ArtifactUnOp::postIncr, "id++"}, {ArtifactUnOp::postDecr, "id--"}};
for (auto test : test_cases)
{
shared_ptr<ArtifactId> op2 = AF::id(op2_name);
pair<ArtifactBinOp, const char *> test_cases[] = {
- {ArtifactBinOp::eq, "a == b"}, {ArtifactBinOp::notEq, "a != b"},
- {ArtifactBinOp::less, "a < b"}, {ArtifactBinOp::lessOrEq, "a <= b"},
- {ArtifactBinOp::great, "a > b"}, {ArtifactBinOp::greatOrEq, "a >= b"},
- {ArtifactBinOp::assign, "a = b"}, {ArtifactBinOp::plus, "a + b"},
- {ArtifactBinOp::minus, "a - b"}, {ArtifactBinOp::mult, "a * b"},
- {ArtifactBinOp::div, "a / b"}, {ArtifactBinOp::plusAssign, "a += b"},
- {ArtifactBinOp::minusAssign, "a -= b"}, {ArtifactBinOp::multAssign, "a *= b"},
- {ArtifactBinOp::divAssign, "a /= b"}};
+ {ArtifactBinOp::eq, "a == b"}, {ArtifactBinOp::notEq, "a != b"},
+ {ArtifactBinOp::less, "a < b"}, {ArtifactBinOp::lessOrEq, "a <= b"},
+ {ArtifactBinOp::great, "a > b"}, {ArtifactBinOp::greatOrEq, "a >= b"},
+ {ArtifactBinOp::assign, "a = b"}, {ArtifactBinOp::plus, "a + b"},
+ {ArtifactBinOp::minus, "a - b"}, {ArtifactBinOp::mult, "a * b"},
+ {ArtifactBinOp::div, "a / b"}, {ArtifactBinOp::plusAssign, "a += b"},
+ {ArtifactBinOp::minusAssign, "a -= b"}, {ArtifactBinOp::multAssign, "a *= b"},
+ {ArtifactBinOp::divAssign, "a /= b"}};
for (auto test : test_cases)
{
shared_ptr<ArtifactVariable> iter = AF::var(var_type, var_name, {}, {AF::lit("0")});
shared_ptr<ArtifactExpr> step =
- AF::bin(ArtifactBinOp::plusAssign, AF::id(var_name), AF::lit("1"));
+ AF::bin(ArtifactBinOp::plusAssign, AF::id(var_name), AF::lit("1"));
shared_ptr<ArtifactExpr> cond =
- AF::bin(ArtifactBinOp::lessOrEq, AF::id(var_name), AF::lit("123"));
+ AF::bin(ArtifactBinOp::lessOrEq, AF::id(var_name), AF::lit("123"));
shared_ptr<ArtifactBinaryExpr> expr =
- AF::bin(ArtifactBinOp::plusAssign, AF::id("hello"), AF::id("world"));
+ AF::bin(ArtifactBinOp::plusAssign, AF::id("hello"), AF::id("world"));
ArtifactForLoop loop(iter, cond, step);
const char *var_name = "i";
shared_ptr<ArtifactExpr> cond =
- AF::bin(ArtifactBinOp::lessOrEq, AF::id(var_name), AF::lit("123"));
+ AF::bin(ArtifactBinOp::lessOrEq, AF::id(var_name), AF::lit("123"));
shared_ptr<ArtifactBinaryExpr> expr =
- AF::bin(ArtifactBinOp::plusAssign, AF::id("hello"), AF::id("world"));
+ AF::bin(ArtifactBinOp::plusAssign, AF::id("hello"), AF::id("world"));
ArtifactIf if_stmt(cond);
list<shared_ptr<ArtifactExpr>> dims{dim1, dim2};
list<shared_ptr<ArtifactExpr>> initializers{AF::lit("123")};
shared_ptr<ArtifactClassVariable> var_decl =
- cls.var(is_public, var_type, var_name, dims, initializers);
+ cls.var(is_public, var_type, var_name, dims, initializers);
return var_decl;
}
const char *code_prefix = "#include \"module.h\"\n\n#include <list>\n\n#include \"bar.h\"\n\n";
const char *code_suffix = "\nClass::Class() {\n}\n\n";
- string ref_data = string(code_prefix) +
- string(AclArtifactUtilities, sizeof(AclArtifactUtilities)) + code_suffix;
+ string ref_data =
+ string(code_prefix) + string(AclArtifactUtilities, sizeof(AclArtifactUtilities)) + code_suffix;
m.accept(&code_gen);
ASSERT_EQ(code_out.str(), ref_data);
// check ordinary includes, like '#include "artifact_data.h"'
checkHeadersSetsEqual(
- m.headerIncludes(),
- {"arm_compute/core/Types.h", "arm_compute/runtime/BlobLifetimeManager.h",
- "arm_compute/runtime/CL/CLBufferAllocator.h", "arm_compute/runtime/CL/CLFunctions.h",
- "arm_compute/runtime/CL/CLScheduler.h", "arm_compute/runtime/MemoryManagerOnDemand.h",
- "arm_compute/runtime/PoolManager.h"},
- "system header includes diverged");
+ m.headerIncludes(),
+ {"arm_compute/core/Types.h", "arm_compute/runtime/BlobLifetimeManager.h",
+ "arm_compute/runtime/CL/CLBufferAllocator.h", "arm_compute/runtime/CL/CLFunctions.h",
+ "arm_compute/runtime/CL/CLScheduler.h", "arm_compute/runtime/MemoryManagerOnDemand.h",
+ "arm_compute/runtime/PoolManager.h"},
+ "system header includes diverged");
checkHeadersSetsEqual(m.sourceSysIncludes(), {}, "system source includes diverged");
}
Graph g;
OpConstructor op_generator =
- [kernel_tensor](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
- auto kernel = g.create<mir::ops::ConstantOp>(kernel_tensor)->getOutput(0);
- return g.create<mir::ops::Conv2DOp>(inputs[0], kernel, mir::Conv2DOpAttributes());
- };
+ [kernel_tensor](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+ auto kernel = g.create<mir::ops::ConstantOp>(kernel_tensor)->getOutput(0);
+ return g.create<mir::ops::Conv2DOp>(inputs[0], kernel, mir::Conv2DOpAttributes());
+ };
vector<Shape> input_shapes{{1, 10, 10, channels}};
Graph g;
OpConstructor op_generator =
- [kernel_tensor](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
- Conv2DOpAttributes attributes;
- auto kernel = g.create<mir::ops::ConstantOp>(kernel_tensor)->getOutput(0);
- return g.create<mir::ops::DepthwiseConv2DOp>(inputs[0], kernel, attributes);
- };
+ [kernel_tensor](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+ Conv2DOpAttributes attributes;
+ auto kernel = g.create<mir::ops::ConstantOp>(kernel_tensor)->getOutput(0);
+ return g.create<mir::ops::DepthwiseConv2DOp>(inputs[0], kernel, attributes);
+ };
vector<Shape> input_shapes{{1, 10, 10, channels}};
Operation *tr1 = g.create<ops::TransposeOp>(in1->getOutput(0), vector<size_t>{0, 3, 1, 2});
Operation *tr2 = g.create<ops::TransposeOp>(in2->getOutput(0), vector<size_t>{0, 3, 1, 2});
Operation *conc =
- g.create<ops::ConcatOp>(vector<Operation::Output *>{tr1->getOutput(0), tr2->getOutput(0)}, 1);
+ g.create<ops::ConcatOp>(vector<Operation::Output *>{tr1->getOutput(0), tr2->getOutput(0)}, 1);
Operation *tanh = g.create<ops::TanhOp>(conc->getOutput(0));
Operation *out = g.create<ops::OutputOp>(tanh->getOutput(0));
(void)out;
Operation *relu1 = g.create<ops::ReluOp>(in1->getOutput(0));
Operation *relu2 = g.create<ops::ReluOp>(in2->getOutput(0));
Operation *conc = g.create<ops::ConcatOp>(
- vector<Operation::Output *>{relu1->getOutput(0), relu2->getOutput(0)}, 1);
+ vector<Operation::Output *>{relu1->getOutput(0), relu2->getOutput(0)}, 1);
Operation *tanh = g.create<ops::TanhOp>(conc->getOutput(0));
Operation *out = g.create<ops::OutputOp>(tanh->getOutput(0));
(void)out;
* @brief Creates graph with one operation generated by opGen function and returns this operation
* node
*/
-mir::Operation *
-fillGraph(mir::Graph &g,
- const function<mir::Operation *(mir::Graph &g, vector<mir::Operation::Output *> &inputs)>
- &op_gen,
- const vector<unique_ptr<mir::TensorVariant>> &input_ntensors)
+mir::Operation *fillGraph(
+ mir::Graph &g,
+ const function<mir::Operation *(mir::Graph &g, vector<mir::Operation::Output *> &inputs)> &op_gen,
+ const vector<unique_ptr<mir::TensorVariant>> &input_ntensors)
{
// Create operation inputs.
vector<mir::Operation::Output *> inputs;
float ref_data = mir::Tensor<float>(ref_nnc_tensor).at(nnc_idx);
float test_data = test_art_tensor.at(artifact_idx);
ASSERT_TRUE(areFloatsNear(ref_data, test_data, 32, 1e-5))
- << "Tensor element " << nnc_idx << " diverged, reference: " << ref_data
- << " test result: " << test_data;
+ << "Tensor element " << nnc_idx << " diverged, reference: " << ref_data
+ << " test result: " << test_data;
}
}
*/
template <typename TestFunc, typename... Args>
void createAndRunTestGraph(
- function<mir::Operation *(mir::Graph &, const std::vector<mir::Operation::Output *> &inputs)>
- op_generator,
- TestFunc artifactOperation, const vector<unique_ptr<mir::TensorVariant>> &input_ntensors,
- Args &... input_atensors)
+ function<mir::Operation *(mir::Graph &, const std::vector<mir::Operation::Output *> &inputs)>
+ op_generator,
+ TestFunc artifactOperation, const vector<unique_ptr<mir::TensorVariant>> &input_ntensors,
+ Args &... input_atensors)
{
mir::Graph g;
mir::Operation *actual_operation = fillGraph(g, op_generator, input_ntensors);
auto op_generator = [&res_shape](mir::Graph &g,
const std::vector<mir::Operation::Output *> &inputs) {
return g.create<mir::ops::ResizeOp>(
- inputs[0], mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, res_shape);
+ inputs[0], mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, res_shape);
};
createAndRunTestGraph(op_generator, resize, input_ntensors, input_atensor);
{
cout << "\n";
std::vector<float> test_scales[] = {
- {1, 2, 2, 1}, {1, 2, 3, 1}, {1, 3, 2, 1}, {1, 2.5, 2, 1}, {1, 3, 9, 1}};
+ {1, 2, 2, 1}, {1, 2, 3, 1}, {1, 3, 2, 1}, {1, 2.5, 2, 1}, {1, 3, 9, 1}};
for (const std::vector<float> &scales : test_scales)
{
vector<int> input_shape_data{1, 4, 4, 1};
auto op_generator = [&scales](mir::Graph &g,
const std::vector<mir::Operation::Output *> &inputs) {
return g.create<mir::ops::ResizeOp>(
- inputs[0], mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, scales);
+ inputs[0], mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, scales);
};
createAndRunTestGraph(op_generator, resize, input_ntensors, input_atensor);
}
for (const auto include_pad : {false, true})
{
attributes.include_pad = include_pad;
- auto op_generator = [&attributes](
- mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
- return g.create<mir::ops::AvgPool2DOp>(inputs[0], attributes);
- };
+ auto op_generator =
+ [&attributes](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+ return g.create<mir::ops::AvgPool2DOp>(inputs[0], attributes);
+ };
createAndRunTestGraph(op_generator, avgPool, input_ntensors, input_atensor);
}
vector<unique_ptr<mir::TensorVariant>> input_ntensors(1);
fillTensors(input_ntensors[0], input_atensor, shape_data, 1.0f);
- auto op_generator = [&window_size, &strides](
- mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+ auto op_generator = [&window_size,
+ &strides](mir::Graph &g,
+ const std::vector<mir::Operation::Output *> &inputs) {
mir::MaxPool2DOpAttributes attributes;
attributes.window = window_size;
attributes.strides = strides;
vector<unique_ptr<mir::TensorVariant>> input_ntensors(1);
fillTensors(input_ntensors[0], input_atensor, input_shape_data, 1.0f);
auto op_generator = [&axis_list, keep_dims](
- mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+ mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
auto op = g.create<mir::ops::ReduceMeanOp>(inputs[0], axis_list, keep_dims);
return op;
};
vector<int> shape_data{5, 30, 40, 12};
vector<int> starts[] = {{0, 0, 0, 0}, {1, 1, 1, 1}, {1, 0, 1, 0}, {0, 1, 1, 0}};
vector<int> sizes[] = {
- {-1, -1, -1, -1}, {4, -1, 10, -1},
+ {-1, -1, -1, -1},
+ {4, -1, 10, -1},
};
for (auto st : starts)
{
// test option with default negative value
Option<int32_t>
- NDefaultNegOpt(optname("-default_neg_val"),
- overview("description of integer option with default negative value"), -33);
+ NDefaultNegOpt(optname("-default_neg_val"),
+ overview("description of integer option with default negative value"), -33);
// test option with positive values
Option<uint32_t> NPosOpt(optname("-pos_val"),
overview("description of integer option with positive value"), 1,
{
// create command line
const char *argv[] = {
- "CLTest", // program name
- // string options
- "-m", "multiopt_value", // second name for option with several names
- "--single", "single_value", // option with single name
- "-several_separators:SOME_VALUE1,SOME_VALUE2", // test option with several separators
- "--one_separarot=AAA_VALUE", // test option whit one separator
- "-default_val_opt", // test option with default value
- "--optional_opt", "/home/guest/tmp", // test optional option
- "-valid_opt", "value2", // test options with defined values
- // integer options
- "-neg_val", "-42", // test negative value for integer option
- "-default_neg_val", // test integer option with default value
- "-pos_val", "33", // test positive value for integer option
- // char options
- "-char-opt", "b", "-dash_opt", "-",
- // bool options
- "-bool_opt=false", "-bool-opt2",
- // vector of strings options
- "-vec_opt1", "1", "c", "222", "ABC", "857", "-vec_opt2", "--vec_opt_with_vals", "abc", "123",
- "xxx", "abc", "xxx",
- // grouped options
- "-group_opt1", "-group_opt2", "abc", "-group_opt3", "11", nullptr};
+ "CLTest", // program name
+ // string options
+ "-m", "multiopt_value", // second name for option with several names
+ "--single", "single_value", // option with single name
+ "-several_separators:SOME_VALUE1,SOME_VALUE2", // test option with several separators
+ "--one_separarot=AAA_VALUE", // test option whit one separator
+ "-default_val_opt", // test option with default value
+ "--optional_opt", "/home/guest/tmp", // test optional option
+ "-valid_opt", "value2", // test options with defined values
+ // integer options
+ "-neg_val", "-42", // test negative value for integer option
+ "-default_neg_val", // test integer option with default value
+ "-pos_val", "33", // test positive value for integer option
+ // char options
+ "-char-opt", "b", "-dash_opt", "-",
+ // bool options
+ "-bool_opt=false", "-bool-opt2",
+ // vector of strings options
+ "-vec_opt1", "1", "c", "222", "ABC", "857", "-vec_opt2", "--vec_opt_with_vals", "abc", "123",
+ "xxx", "abc", "xxx",
+ // grouped options
+ "-group_opt1", "-group_opt2", "abc", "-group_opt3", "11", nullptr};
int argc = (sizeof(argv) / sizeof(argv[0])) - 1;
// It must be failed if option is not passed and other options are in the same group
attributes.padding_before = {67, 123};
attributes.padding_after = {32, 356};
auto *dw_conv =
- g.create<mir::ops::DepthwiseConv2DOp>(input->getOutput(0), kernel->getOutput(0), attributes);
+ g.create<mir::ops::DepthwiseConv2DOp>(input->getOutput(0), kernel->getOutput(0), attributes);
auto *output = g.create<mir::ops::OutputOp>(dw_conv->getOutput(0));
attributes.padding_before = {31, 72};
attributes.padding_after = {32, 71};
auto *deconv =
- g.create<mir::ops::DeConv2DOp>(input->getOutput(0), kernel->getOutput(0), attributes);
+ g.create<mir::ops::DeConv2DOp>(input->getOutput(0), kernel->getOutput(0), attributes);
auto *output = g.create<mir::ops::OutputOp>(deconv->getOutput(0));
add_library(nnkit_caffe_backend SHARED Module.cpp)
target_link_libraries(nnkit_caffe_backend nnkit_support_caffe)
-target_link_libraries(nnkit_caffe_backend stdex)
#include "nnkit/support/caffe/Backend.h"
#include <nnkit/CmdlineArguments.h>
-#include <stdex/Memory.h>
+
+#include <memory>
extern "C" std::unique_ptr<nnkit::Backend> make_backend(const nnkit::CmdlineArguments &args)
{
- using stdex::make_unique;
+ using std::make_unique;
auto net = make_unique<::caffe::Net<float>>(args.at(0), caffe::TEST);
const nncc::core::ADT::tensor::Reader<T> &)>;
template <typename T>
- using TypedAccessor = std::function<void(const TensorContext &, uint32_t n,
- nncc::core::ADT::tensor::Accessor<T> &)>;
+ using TypedAccessor =
+ std::function<void(const TensorContext &, uint32_t n, nncc::core::ADT::tensor::Accessor<T> &)>;
virtual ~TensorContext() = default;
target_include_directories(nnkit_support_backend PUBLIC include)
target_link_libraries(nnkit_support_backend PUBLIC nnkit_intf_backend)
target_link_libraries(nnkit_support_backend PUBLIC dl)
-target_link_libraries(nnkit_support_backend PUBLIC stdex)
find_package(Threads QUIET)
#include "nnkit/BackendPlugin.h"
#include <cassert>
-#include <stdex/Memory.h>
+#include <memory>
#include <iostream>
// NOTE dlfcn.h is not a standard library
exit(1);
}
- return stdex::make_unique<BackendPlugin>(handle, entry);
+ return std::make_unique<BackendPlugin>(handle, entry);
}
} // namespace nnkit
#include "nnkit/support/moco/tf/Backend.h"
#include <nnkit/CmdlineArguments.h>
-#include <stdex/Memory.h>
+#include <memory>
#include <cassert>
extern "C" std::unique_ptr<nnkit::Backend> make_backend(const nnkit::CmdlineArguments &args)
{
- using stdex::make_unique;
+ using std::make_unique;
assert(args.size() == 2); // args.at[0] : *.pb path, args.at[1]: *.info path
add_library(nnkit_moco_tf_backend SHARED Backend.cpp)
target_link_libraries(nnkit_moco_tf_backend nnkit_support_moco_tf)
-target_link_libraries(nnkit_moco_tf_backend stdex)
-require("stdex")
# To use "nnkit_support_tftestinfo"
require("tfinfo")
require("loco")
target_link_libraries(nnkit_support_moco_tf locomotiv)
target_link_libraries(nnkit_support_moco_tf moco_tf_frontend)
target_link_libraries(nnkit_support_moco_tf loco)
-target_link_libraries(nnkit_support_moco_tf stdex)
#include <moco/tf/Frontend.h>
#include <moco/Names.h>
-#include <stdex/Memory.h>
#include <nncc/core/ADT/tensor/Buffer.h>
#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <memory>
#include <utility> // std::move
#include <stdexcept>
// set member vars
_loco_graph = std::move(loco_graph);
- _sess = stdex::make_unique<locomotiv::Session>(_loco_graph.get());
+ _sess = std::make_unique<locomotiv::Session>(_loco_graph.get());
}
void Backend::prepare(const std::function<void(nnkit::TensorContext &)> &f)
for (int n = 0; n < _inputs.size(); n++)
{
auto buf = make_buffer<float, LexicalLayout>(_inputs.at(n)->shape());
- buf_list.emplace_back(stdex::make_unique<nncc::core::ADT::tensor::Buffer<float>>(buf));
+ buf_list.emplace_back(std::make_unique<nncc::core::ADT::tensor::Buffer<float>>(buf));
}
// fill test input values
}
void InputTensorContext::getConstFloatTensor(
- uint32_t n, const nnkit::TensorContext::TypedReader<float> &f) const
+ uint32_t n, const nnkit::TensorContext::TypedReader<float> &f) const
{
auto buf = _buffers.at(n).get();
f(*this, n, *buf);
public:
InputTensorContext(const ParsedTensors &parsed_tensors, const Buffers &buffers)
- : TensorContext(parsed_tensors), _buffers(buffers)
+ : TensorContext(parsed_tensors), _buffers(buffers)
{ /* empty */
}
{
void OutputTensorContext::getConstFloatTensor(
- uint32_t n, const nnkit::TensorContext::TypedReader<float> &f) const
+ uint32_t n, const nnkit::TensorContext::TypedReader<float> &f) const
{ // for output
using nncc::core::ADT::tensor::LexicalLayout;
using nncc::core::ADT::tensor::make_overlay;
{
public:
OutputTensorContext(const ParsedTensors &parsed_tensors, locomotiv::Session *sess)
- : TensorContext(parsed_tensors), _sess(sess)
+ : TensorContext(parsed_tensors), _sess(sess)
{ /* empty */
}
#include "nnkit/support/onnx/Backend.h"
#include <nnkit/CmdlineArguments.h>
-#include <stdex/Memory.h>
+#include <memory>
#include <cassert>
extern "C" std::unique_ptr<nnkit::Backend> make_backend(const nnkit::CmdlineArguments &args)
{
assert(args.size() == 1); // args.at[0] : onnx file
- return stdex::make_unique<::nnkit::support::onnx::Backend>(args.at(0));
+ return std::make_unique<::nnkit::support::onnx::Backend>(args.at(0));
}
add_library(nnkit_onnx_backend SHARED Backend.cpp)
target_link_libraries(nnkit_onnx_backend nnkit_support_onnx)
-target_link_libraries(nnkit_onnx_backend stdex)
-require("stdex")
require("nnkit-intf")
target_include_directories(nnkit_support_onnx-1.4 PUBLIC include)
target_link_libraries(nnkit_support_onnx-1.4 nnkit_intf_backend)
target_link_libraries(nnkit_support_onnx-1.4 onnxruntime)
-target_link_libraries(nnkit_support_onnx-1.4 stdex)
add_library(nnkit_support_onnx ALIAS nnkit_support_onnx-1.4)
{
public:
TensorSet(Allocator *allocator, size_t nums)
- : _allocator(allocator), _names(nums), _types(nums), _dims(nums), _tensors(nums, nullptr)
+ : _allocator(allocator), _names(nums), _types(nums), _dims(nums), _tensors(nums, nullptr)
{
// DO NOTHING
}
Status status;
status =
- OrtCreateTensorAsOrtValue(_allocator, dims.data(), dims.size(), type, &_tensors[index]);
+ OrtCreateTensorAsOrtValue(_allocator, dims.data(), dims.size(), type, &_tensors[index]);
status.throwOnError();
assert(OrtIsTensor(_tensors[index]));
#include "nnkit/support/onnx/Runner.h"
#include "nnkit/support/onnx/Status.h"
-#include <stdex/Memory.h>
+#include <memory>
#include <cassert>
namespace nnkit
namespace onnx
{
-Runner::Runner(const std::string &path) : _allocator(stdex::make_unique<Allocator>())
+Runner::Runner(const std::string &path) : _allocator(std::make_unique<Allocator>())
{
Status status;
status = OrtSessionGetInputCount(_session, &num_input_nodes);
status.throwOnError();
- _inputs = stdex::make_unique<TensorSet>(_allocator.get(), num_input_nodes);
+ _inputs = std::make_unique<TensorSet>(_allocator.get(), num_input_nodes);
for (size_t i = 0; i < num_input_nodes; ++i)
{
status = OrtSessionGetOutputCount(_session, &num_output_nodes);
status.throwOnError();
- _outputs = stdex::make_unique<TensorSet>(_allocator.get(), num_output_nodes);
+ _outputs = std::make_unique<TensorSet>(_allocator.get(), num_output_nodes);
for (size_t i = 0; i < num_output_nodes; ++i)
{
#include "nnkit/support/tf/Backend.h"
#include <nnkit/CmdlineArguments.h>
-#include <stdex/Memory.h>
+#include <memory>
#include <cassert>
extern "C" std::unique_ptr<nnkit::Backend> make_backend(const nnkit::CmdlineArguments &args)
{
- using stdex::make_unique;
+ using std::make_unique;
assert(args.size() == 2); // args.at[0] : test.pb path, argas.at[1]: test.info path
add_library(nnkit_tf_backend SHARED Backend.cpp)
target_link_libraries(nnkit_tf_backend nnkit_support_tf)
-target_link_libraries(nnkit_tf_backend stdex)
-require("stdex")
require("tfinfo")
require("nnkit-intf")
add_library(nnkit_support_tf-1.13 STATIC ${SOURCES})
set_target_properties(nnkit_support_tf-1.13 PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(nnkit_support_tf-1.13 PUBLIC include)
-target_link_libraries(nnkit_support_tf-1.13 nnkit_intf_backend stdex nnkit_support_tftestinfo)
+target_link_libraries(nnkit_support_tf-1.13 nnkit_intf_backend nnkit_support_tftestinfo)
target_link_libraries(nnkit_support_tf-1.13 tensorflow-1.13)
add_library(nnkit_support_tf ALIAS nnkit_support_tf-1.13)
{
public:
TensorContext(const std::vector<std::unique_ptr<ParsedTensor>> &tensors, TensorDataMap &data_map)
- : _tensors(tensors), _data_map(data_map)
+ : _tensors(tensors), _data_map(data_map)
{
// empty
}
class TensorDataMap
{
public:
- TensorDataMap() { /* empty */}
+ TensorDataMap()
+ { /* empty */
+ }
uint8_t *allocate(const ParsedTensor *parsed_tensor)
{
angkor::TensorShape shape;
if (!_tf_runner.getTensorShapeFromGraphDef(parsed_tensor, shape))
throw oops::UserExn(
- "Info you provided may be wrong or not enough. Please check the info file.");
+ "Info you provided may be wrong or not enough. Please check the info file.");
parsed_tensor->mutable_shape().resize(shape.rank());
for (int r = 0; r < shape.rank(); r++)
throw std::runtime_error("Not supported tensor type");
TF_Tensor *input_tensor =
- create_tensor(TF_FLOAT, shape.data(), shape.size(), data_map.data(tensor.get()),
- num_elements(tensor->shape()) * size);
+ create_tensor(TF_FLOAT, shape.data(), shape.size(), data_map.data(tensor.get()),
+ num_elements(tensor->shape()) * size);
_input_ops.emplace_back(input_op);
_input_tensors.emplace_back(input_tensor);
0, // Target operations, number of targets.
nullptr, // Run metadata.
_status // Output status.
- );
+ );
if (TF_GetCode(_status) != TF_OK)
throw std::runtime_error(TF_Message(_status));
std::unique_ptr<::tflite::FlatBufferModel> _model;
std::unique_ptr<::tflite::Interpreter> _interp;
};
-}
+} // namespace
#include <nnkit/CmdlineArguments.h>
-#include <stdex/Memory.h>
+
+#include <memory>
extern "C" std::unique_ptr<nnkit::Backend> make_backend(const nnkit::CmdlineArguments &args)
{
- return stdex::make_unique<GenericBackend>(args.at(0));
+ return std::make_unique<GenericBackend>(args.at(0));
}
add_library(nnkit_tflite_backend SHARED Backend.cpp)
target_link_libraries(nnkit_tflite_backend nnkit_support_tflite)
-target_link_libraries(nnkit_tflite_backend stdex)
-require("stdex")
require("nnkit-intf")
target_include_directories(nnkit_HDF5_export_action PRIVATE ${HDF5_INCLUDE_DIRS})
target_link_libraries(nnkit_HDF5_export_action nnkit_intf_action)
target_link_libraries(nnkit_HDF5_export_action nnkit_HDF5_common)
-target_link_libraries(nnkit_HDF5_export_action stdex)
add_library(nnkit_HDF5_import_action SHARED Import.cpp)
target_include_directories(nnkit_HDF5_import_action PRIVATE ${HDF5_INCLUDE_DIRS})
target_link_libraries(nnkit_HDF5_import_action nnkit_intf_action)
target_link_libraries(nnkit_HDF5_import_action nnkit_HDF5_common)
-target_link_libraries(nnkit_HDF5_import_action stdex)
H5::DataSpace dataspace(rank, dims);
auto dataset =
- _value_grp.createDataSet(value_filename(n), H5::PredType::IEEE_F32BE, dataspace);
+ _value_grp.createDataSet(value_filename(n), H5::PredType::IEEE_F32BE, dataspace);
float *data = new float[nncc::core::ADT::tensor::num_elements(shape)];
H5::StrType name_datatype(H5::PredType::C_S1, name.size());
auto name_attr =
- _name_grp.createAttribute(value_filename(n), name_datatype, name_dataspace);
+ _name_grp.createAttribute(value_filename(n), name_datatype, name_dataspace);
name_attr.write(name_datatype, name);
}
};
#include <nnkit/CmdlineArguments.h>
-#include <stdex/Memory.h>
+
+#include <memory>
extern "C" std::unique_ptr<nnkit::Action> make_action(const nnkit::CmdlineArguments &args)
{
- return stdex::make_unique<HD5ExportAction>(args.at(0));
+ return std::make_unique<HD5ExportAction>(args.at(0));
}
};
#include <nnkit/CmdlineArguments.h>
-#include <stdex/Memory.h>
+
+#include <memory>
extern "C" std::unique_ptr<nnkit::Action> make_action(const nnkit::CmdlineArguments &args)
{
- return stdex::make_unique<HD5ImportAction>(args.at(0));
+ return std::make_unique<HD5ImportAction>(args.at(0));
}
add_library(nnkit_show_action SHARED Show.cpp)
target_link_libraries(nnkit_show_action nnkit_intf_action)
-target_link_libraries(nnkit_show_action stdex)
add_library(nnkit_randomize_action SHARED Randomize.cpp)
target_link_libraries(nnkit_randomize_action nnkit_intf_action)
-target_link_libraries(nnkit_randomize_action stdex)
};
#include <nnkit/CmdlineArguments.h>
-#include <stdex/Memory.h>
+
+#include <memory>
extern "C" std::unique_ptr<nnkit::Action> make_action(const nnkit::CmdlineArguments &args)
{
- return stdex::make_unique<RandomizeAction>();
+ return std::make_unique<RandomizeAction>();
}
}
#include <nnkit/CmdlineArguments.h>
-#include <stdex/Memory.h>
+
+#include <memory>
extern "C" std::unique_ptr<nnkit::Action> make_action(const nnkit::CmdlineArguments &args)
{
- return stdex::make_unique<ShowAction>();
+ return std::make_unique<ShowAction>();
}
add_executable(nnkit-benchmark ${SOURCES})
target_link_libraries(nnkit-benchmark nnkit_support_cmdline)
target_link_libraries(nnkit-benchmark nnkit_support_backend)
-target_link_libraries(nnkit-benchmark stdex)
#include <nnkit/VectorArguments.h>
#include <nnkit/BackendPlugin.h>
-#include <stdex/Memory.h>
-
+#include <memory>
#include <map>
#include <string>
#include <iostream>
#include <iomanip>
-using stdex::make_unique;
+using std::make_unique;
using std::chrono::milliseconds;
using std::chrono::microseconds;
target_link_libraries(nnkit-run nnkit_intf_backend)
target_link_libraries(nnkit-run nnkit_support_cmdline)
target_link_libraries(nnkit-run nnkit_support_backend)
-target_link_libraries(nnkit-run stdex)
private:
nnkit::VectorArguments _args;
};
-}
+} // namespace
namespace
{
std::string _path;
std::unique_ptr<nnkit::BackendPlugin> _plugin;
};
-}
+} // namespace
// TODO Extract Action-related helpers
#include <nnkit/Action.h>
void *_handle;
Entry _entry;
};
-}
+} // namespace
namespace
{
private:
ActionBinder _binder;
};
-}
-
-#include <stdex/Memory.h>
+} // namespace
+#include <memory>
#include <map>
#include <iostream>
std::map<std::string, std::function<void(const std::string &arg)>> argparse;
argparse["--backend"] = [§ions](const std::string &tag) {
- sections.backend = stdex::make_unique<BackendSection>(tag);
+ sections.backend = std::make_unique<BackendSection>(tag);
};
argparse["--backend-arg"] = [§ions](const std::string &arg) {
{
public:
PadInfo(uint32_t top, uint32_t bottom, uint32_t left, uint32_t right)
- : _top{top}, _bottom{bottom}, _left{left}, _right{right}
+ : _top{top}, _bottom{bottom}, _left{left}, _right{right}
{
// DO NOTHING
}
uint32_t _vertical;
};
-} // namespace nncc
+} // namespace nnop
#endif // __NNOP_STRIDE_INFO_H__
{
RandomModel::RandomModel(int32_t seed)
- : _ifm_shape{1, 8, 8}, _ifm_name{"ifm"}, _ofm_name{"ofm"}, _ofm_shape{2, 6, 6},
- _ker_buffer{kernel::Shape{2, 1, 3, 3}, kernel::NCHWLayout{}}
+ : _ifm_shape{1, 8, 8}, _ifm_name{"ifm"}, _ofm_name{"ofm"}, _ofm_shape{2, 6, 6},
+ _ker_buffer{kernel::Shape{2, 1, 3, 3}, kernel::NCHWLayout{}}
{
std::default_random_engine gen{static_cast<uint32_t>(seed)};
std::normal_distribution<float> dist{0.0f, 1.0f};
add_library(nnsuite_conv_caffe SHARED ${SOURCES})
target_link_libraries(nnsuite_conv_caffe nnsuite_conv)
target_link_libraries(nnsuite_conv_caffe nnkit_support_caffe)
-target_link_libraries(nnsuite_conv_caffe stdex)
nnas_find_package(GTest QUIET)
#include <nncc/core/ADT/kernel/Overlay.h>
#include <nncc/core/ADT/kernel/NCHWLayout.h>
-#include <stdex/Memory.h>
+#include <memory>
-using stdex::make_unique;
+using std::make_unique;
std::unique_ptr<nnkit::Backend> ConvBackend::create(const nnsuite::conv::Model &model)
{
TestModel(const std::string &ifm_name, const feature::Shape &ifm_shape,
const std::string &ofm_name, const feature::Shape &ofm_shape,
const kernel::Shape &ker_shape, const kernel::Layout &ker_layout, float *ker_data)
- : _ifm_name(ifm_name), _ifm_shape(ifm_shape), _ofm_name(ofm_name), _ofm_shape(ofm_shape),
- _ker{ker_shape, ker_layout, ker_data}
+ : _ifm_name(ifm_name), _ifm_shape(ifm_shape), _ofm_name(ofm_name),
+ _ofm_shape(ofm_shape), _ker{ker_shape, ker_layout, ker_data}
{
// DO NOTHING
}
add_library(nnsuite_conv_tflite SHARED ${SOURCES})
target_link_libraries(nnsuite_conv_tflite nnsuite_conv)
target_link_libraries(nnsuite_conv_tflite nnkit_support_tflite-1.7)
-target_link_libraries(nnsuite_conv_tflite stdex)
nnas_find_package(GTest QUIET)
}
ConvBackend::ConvBackend(const nnsuite::conv::Model &model)
- : _ifm_name{model.ifm_name()}, _ofm_name{model.ofm_name()}
+ : _ifm_name{model.ifm_name()}, _ofm_name{model.ofm_name()}
{
using nncc::core::ADT::kernel::Overlay;
using nncc::core::ADT::kernel::NHWCLayout;
as_dims(model.ifm_shape()), quantization);
_interp.SetTensorParametersReadOnly(
- 2, kTfLiteFloat32 /* type */, "kernel" /* name */, as_dims(model.ker_shape()), quantization,
- reinterpret_cast<const char *>(_kernel.data()), _kernel.size() * sizeof(float));
+ 2, kTfLiteFloat32 /* type */, "kernel" /* name */, as_dims(model.ker_shape()), quantization,
+ reinterpret_cast<const char *>(_kernel.data()), _kernel.size() * sizeof(float));
_interp.SetTensorParametersReadOnly(
- 3, kTfLiteFloat32 /* type */, "bias" /* name */, {static_cast<int>(_bias.size())},
- quantization, reinterpret_cast<const char *>(_bias.data()), _bias.size() * sizeof(float));
+ 3, kTfLiteFloat32 /* type */, "bias" /* name */, {static_cast<int>(_bias.size())}, quantization,
+ reinterpret_cast<const char *>(_bias.data()), _bias.size() * sizeof(float));
auto param = typed_malloc<TfLiteConvParams>();
TestModel(const std::string &ifm_name, const feature::Shape &ifm_shape,
const std::string &ofm_name, const feature::Shape &ofm_shape,
const kernel::Shape &ker_shape, const kernel::Layout &ker_layout, float *ker_data)
- : _ifm_name(ifm_name), _ifm_shape(ifm_shape), _ofm_name(ofm_name), _ofm_shape(ofm_shape),
- _ker{ker_shape, ker_layout, ker_data}
+ : _ifm_name(ifm_name), _ifm_shape(ifm_shape), _ofm_name(ofm_name),
+ _ofm_shape(ofm_shape), _ker{ker_shape, ker_layout, ker_data}
{
// DO NOTHING
}
#include <nnkit/Backend.h>
#include <nnkit/CmdlineArguments.h>
-#include <stdex/Memory.h>
-
+#include <memory>
#include <chrono>
#include <iostream>
const nnsuite::conv::RandomModel model{seed};
- return stdex::make_unique<ConvBackend>(model);
+ return std::make_unique<ConvBackend>(model);
}
one-import-bcq
one-import-tf
one-import-tflite
+ one-import-onnx
one-optimize
one-quantize
one-pack
return()
endif(NOT ENABLE_TEST)
+add_subdirectory(dummy-driver)
add_subdirectory(tests)
+add_subdirectory(validate-onnx2circle)
--- /dev/null
+# dummy driver for interface test
+set(DUMMY_DRIVER_SRC src/dummy-compile.cpp)
+set(HELP_DRIVER_SRC src/help-compile.cpp)
+
+add_executable(dummy-compile ${DUMMY_DRIVER_SRC})
+add_executable(help-compile ${HELP_DRIVER_SRC})
+
+set(DUMMY_DRIVER "${CMAKE_CURRENT_BINARY_DIR}/dummy-compile")
+set(HELP_DRIVER "${CMAKE_CURRENT_BINARY_DIR}/help-compile")
+
+install(FILES ${DUMMY_DRIVER}
+ PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+ GROUP_READ GROUP_EXECUTE
+ WORLD_READ WORLD_EXECUTE
+ DESTINATION test)
+
+install(FILES ${HELP_DRIVER}
+ PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+ GROUP_READ GROUP_EXECUTE
+ WORLD_READ WORLD_EXECUTE
+ DESTINATION test)
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * dummy-compile only tests its interface rather than its functionality.
+ *
+ * ./dummy-compile -o ${OUTPUT_NAME} ${INPUT_NAME}
+ *
+ * NOTE argv[3](INPUT_NAME) is not used here.
+ */
+
+#include <iostream>
+#include <fstream>
+#include <string>
+
+int main(int argc, char **argv)
+{
+ if (argc != 4)
+ return EXIT_FAILURE;
+
+ std::string opt_o{"-o"};
+ std::string argv_1{argv[1]};
+
+ if (opt_o != argv_1)
+ return EXIT_FAILURE;
+
+ std::string output_name{argv[2]};
+ std::ofstream outfile(output_name);
+
+ outfile << "dummy-compile dummy output!!" << std::endl;
+
+ outfile.close();
+
+ return EXIT_SUCCESS;
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * help-compile prints dummy help message.
+ *
+ * $ ./help-compile -h
+ * HELP MESSAGE!!
+ */
+
+#include <iostream>
+#include <fstream>
+#include <string>
+
+int main(int argc, char **argv)
+{
+ if (argc != 2)
+ return EXIT_FAILURE;
+
+ std::string opt_h{"-h"};
+ std::string argv_1{argv[1]};
+
+ if (opt_h != argv_1)
+ return EXIT_FAILURE;
+
+ std::cout << "HELP MESSAGE!!" << std::endl;
+
+ return EXIT_SUCCESS;
+}
version 2.3.0, recommanded 2.x version as of now, so that 'one-import-tf'
command can execute properly.
+'one-prepare-venv' will also prepare onnx and onnx-tensorflow version 1.7.0 so
+that 'one-import-onnx' command can execute properly.
+
Prerequisite
------------
one-optimize provides network or operator transformation shown below.
Current transformation options are
+- disable_validation : This will turn off operator validations.
+- fold_add_v2 : This removes AddV2 operation which can be folded
+- fold_cast : This removes Cast operation which can be folded
- fold_dequantize : This removes Dequantize operation which can be folded
+- fold_sparse_to_dense : This removes SparseToDense operation which can be folded
+- forward_reshape_to_unaryop: This will move Reshape after UnaryOp for centain condition
- fuse_add_with_tconv: This fuses Add operator with the preceding TConv operator if possible
+- fuse_batchnorm_with_conv : This fuses BatchNorm operator to convolution operator
+- fuse_batchnorm_with_dwconv : This fuses BatchNorm operator to depthwise convolution operator
+- fuse_batchnorm_with_tconv : This fuses BatchNorm operator to transpose convolution operator
- fuse_bcq: This enables Binary-Coded-bases Quantized DNNs
- read https://arxiv.org/abs/2005.09904 for detailed information
- fuse_instnorm: This will convert instance normalization related operators to
- make_batchnorm_gamma_positive: This makes negative gamma of batch normalization into a small positive value (1e-10).
Note that this pass can change the execution result of the model.
So, use it only when the impact is known to be acceptable.
+- mute_warnings : This will turn off warning messages.
+- generate_profile_data : This will turn on profiling data generation.
+- remove_redundant_reshape : This fuses or removes redundant reshape operators.
+- remove_redundant_transpose : This fuses or removes redundant transpose operators.
+- remove_unnecessary_reshape : This removes unnecessary reshape operators.
+- remove_unnecessary_slice : This removes unnecessary slice operators.
+- remove_unnecessary_strided_slice : This removes unnecessary strided slice operators.
+- remove_unnecessary_split : This removes unnecessary split operators.
- replace_cw_mul_add_with_depthwise_conv: This will replace channel-wise Mul/Add with DepthwiseConv2D.
- resolve_customop_add: This will convert Custom(Add) to normal Add operator
- resolve_customop_batchmatmul: This will convert Custom(BatchMatMul) to
normal BatchMatMul operator
- resolve_customop_matmul: This will convert Custom(MatMul) to normal MatMul
operator
+- shuffle_weight_to_16x1float32 : This will convert weight format of FullyConnected to SHUFFLED16x1FLOAT32.
+ Note that it only converts weights whose row is a multiple of 16.
+- substitute_pack_to_reshape : This will convert single input Pack to Reshape.
+- substitute_squeeze_to_reshape : This will convert certain condition Squeeze to Reshape.
+- substitute_transpose_to_reshape : This will convert certain condition Transpose to Reshape.
+- transform_min_max_to_relu6: This will transform Minimum-Maximum pattern to Relu6 operator.
+
+There are options to enable multiple options at once for convenience.
+- O1: fuse_bcq, fuse_instnorm, resolve_customop_add, resolve_customop_batchmatmul,
+ resolve_customop_matmul, remove_redundant_transpose, substitute_pack_to_reshape
one-quantize
''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python # '''
''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@" # '''
''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255 # '''
# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
#
'one-import-bcq': 'one-import-bcq',
'one-import-tf': 'one-import-tf',
'one-import-tflite': 'one-import-tflite',
+ 'one-import-onnx': 'one-import-onnx',
'one-optimize': 'one-optimize',
+ 'one-quantize': 'one-quantize',
'one-pack': 'one-pack',
'one-codegen': 'one-codegen'
}[driver_name]
def _verify_cfg(driver_list, config):
if not config.has_section('one-build'):
- raise ImportError('\'one-build\' section is required in configuraion file')
+ raise ImportError('[one-build] section is required in configuraion file')
import_driver_cnt = 0
if _is_available_driver(config, 'one-import-tf'):
import_driver_cnt += 1
if _is_available_driver(config, 'one-import-bcq'):
import_driver_cnt += 1
+ if _is_available_driver(config, 'one-import-onnx'):
+ import_driver_cnt += 1
if import_driver_cnt > 1:
raise AssertionError('Only one import-* driver can be executed')
# verify configuration file
drivers = [
- 'one-import-tf', 'one-import-tflite', 'one-import-bcq', 'one-optimize',
- 'one-quantize', 'one-pack', 'one-codegen'
+ 'one-import-tf', 'one-import-tflite', 'one-import-bcq', 'one-import-onnx',
+ 'one-optimize', 'one-quantize', 'one-pack', 'one-codegen'
]
_verify_cfg(drivers, config)
one-import-tf=True
one-import-tflite=False
one-import-bcq=False
+one-import-onnx=False
one-optimize=True
one-quantize=False
one-pack=True
[one-optimize]
input_path=inception_v3.circle
output_path=inception_v3.opt.circle
+generate_profile_data=False
[one-pack]
input_path=inception_v3.opt.circle
# limitations under the License.
import argparse
+import copy
+import itertools
import os
import subprocess
import sys
def _get_parser():
- parser = argparse.ArgumentParser(description='command line tool for code generation')
+ codegen_usage = 'one-codegen [-h] [-v] [-C CONFIG] [-b BACKEND] [--] [COMMANDS FOR BACKEND]'
+ parser = argparse.ArgumentParser(
+ description='command line tool for code generation', usage=codegen_usage)
_utils._add_default_arg(parser)
def _parse_arg(parser):
- args, unknown_args = parser.parse_known_args()
+ codegen_args = []
+ backend_args = []
+ unknown_args = []
+ argv = copy.deepcopy(sys.argv)
+ # delete file name
+ del argv[0]
+ # split by '--'
+ args = [list(y) for x, y in itertools.groupby(argv, lambda z: z == '--') if not x]
+ # one-codegen has two interfaces
+ # 1. one-codegen [-h] [-v] [-C CONFIG] [-b BACKEND] [COMMANDS FOR BACKEND]
+ if len(args) == 1:
+ codegen_args = args[0]
+ codegen_args, unknown_args = parser.parse_known_args(codegen_args)
+ # 2. one-codegen [-h] [-v] [-C CONFIG] [-b BACKEND] -- [COMMANDS FOR BACKEND]
+ if len(args) == 2:
+ codegen_args = args[0]
+ backend_args = args[1]
+ codegen_args = parser.parse_args(codegen_args)
# print version
- if args.version:
+ if len(args) and codegen_args.version:
_utils._print_version_and_exit(__file__)
- return args, unknown_args
+ return codegen_args, backend_args, unknown_args
def main():
# parse arguments
parser = _get_parser()
- args, unknown_args = _parse_arg(parser)
+ args, backend_args, unknown_args = _parse_arg(parser)
# parse configuration file
_utils._parse_cfg(args, 'one-codegen')
# make a command to run given backend driver
dir_path = os.path.dirname(os.path.realpath(__file__))
codegen_path = os.path.join(dir_path, getattr(args, 'backend') + '-compile')
- codegen_cmd = [codegen_path] + unknown_args
+ codegen_cmd = [codegen_path] + backend_args + unknown_args
if _utils._is_valid_attr(args, 'command'):
codegen_cmd += getattr(args, 'command').split()
bufsize=1) as p:
for line in p.stdout:
sys.stdout.buffer.write(line)
+ if p.returncode != 0:
+ sys.exit(p.returncode)
if __name__ == '__main__':
--- /dev/null
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # '''
+''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@" # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255 # '''
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import subprocess
+import sys
+import tempfile
+import onnx
+import onnx_tf
+
+import utils as _utils
+
+
+def _get_parser():
+ parser = argparse.ArgumentParser(
+ description='command line tool to convert ONNX to circle')
+
+ _utils._add_default_arg(parser)
+
+ ## tf2tfliteV2 arguments
+ tf2tfliteV2_group = parser.add_argument_group('converter arguments')
+
+ # input and output path.
+ tf2tfliteV2_group.add_argument(
+ '-i', '--input_path', type=str, help='full filepath of the input file')
+ tf2tfliteV2_group.add_argument(
+ '-o', '--output_path', type=str, help='full filepath of the output file')
+
+ # input and output arrays.
+ tf2tfliteV2_group.add_argument(
+ '-I',
+ '--input_arrays',
+ type=str,
+ help='names of the input arrays, comma-separated')
+ tf2tfliteV2_group.add_argument(
+ '-O',
+ '--output_arrays',
+ type=str,
+ help='names of the output arrays, comma-separated')
+
+ # fixed options
+ tf2tfliteV2_group.add_argument('--model_format', default='saved_model')
+ tf2tfliteV2_group.add_argument('--converter_version', default='v2')
+
+ return parser
+
+
+def _verify_arg(parser, args):
+ """verify given arguments"""
+ # check if required arguments is given
+ missing = []
+ if not _utils._is_valid_attr(args, 'input_path'):
+ missing.append('-i/--input_path')
+ if not _utils._is_valid_attr(args, 'output_path'):
+ missing.append('-o/--output_path')
+ if len(missing):
+ parser.error('the following arguments are required: ' + ' '.join(missing))
+
+
+def _parse_arg(parser):
+ args = parser.parse_args()
+ # print version
+ if args.version:
+ _utils._print_version_and_exit(__file__)
+
+ return args
+
+
+def _convert(args):
+ # get file path to log
+ dir_path = os.path.dirname(os.path.realpath(__file__))
+ logfile_path = os.path.realpath(args.output_path) + '.log'
+
+ with open(logfile_path, 'wb') as f, tempfile.TemporaryDirectory() as tmpdir:
+ # convert onnx to tf saved model
+ onnx_model = onnx.load(getattr(args, 'input_path'))
+ tf_savedmodel = onnx_tf.backend.prepare(onnx_model)
+
+ savedmodel_name = os.path.splitext(os.path.basename(
+ args.output_path))[0] + '.savedmodel'
+ savedmodel_output_path = os.path.join(tmpdir, savedmodel_name)
+ tf_savedmodel.export_graph(savedmodel_output_path)
+
+ # make a command to convert from tf to tflite
+ tf2tfliteV2_path = os.path.join(dir_path, 'tf2tfliteV2.py')
+ tf2tfliteV2_output_name = os.path.splitext(os.path.basename(
+ args.output_path))[0] + '.tflite'
+ tf2tfliteV2_output_path = os.path.join(tmpdir, tf2tfliteV2_output_name)
+
+ tf2tfliteV2_cmd = _utils._make_tf2tfliteV2_cmd(
+ args, tf2tfliteV2_path, savedmodel_output_path, tf2tfliteV2_output_path)
+
+ f.write((' '.join(tf2tfliteV2_cmd) + '\n').encode())
+
+ # convert tf to tflite
+ with subprocess.Popen(
+ tf2tfliteV2_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+ bufsize=1) as p:
+ for line in p.stdout:
+ sys.stdout.buffer.write(line)
+ f.write(line)
+ if p.returncode != 0:
+ sys.exit(p.returncode)
+
+ # make a command to convert from tflite to circle
+ tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
+ tflite2circle_cmd = _utils._make_tflite2circle_cmd(tflite2circle_path,
+ tf2tfliteV2_output_path,
+ getattr(args, 'output_path'))
+
+ f.write((' '.join(tflite2circle_cmd) + '\n').encode())
+
+ # convert tflite to circle
+ with subprocess.Popen(
+ tflite2circle_cmd,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT,
+ bufsize=1) as p:
+ for line in p.stdout:
+ sys.stdout.buffer.write(line)
+ f.write(line)
+ if p.returncode != 0:
+ sys.exit(p.returncode)
+
+
+def main():
+ # parse arguments
+ parser = _get_parser()
+ args = _parse_arg(parser)
+
+ # parse configuration file
+ _utils._parse_cfg(args, 'one-import-onnx')
+
+ # verify arguments
+ _verify_arg(parser, args)
+
+ # convert
+ _convert(args)
+
+
+if __name__ == '__main__':
+ main()
for line in p.stdout:
sys.stdout.buffer.write(line)
f.write(line)
+ if p.returncode != 0:
+ sys.exit(p.returncode)
def main():
_utils._add_default_arg(parser)
+ ## utility arguments
+ utility_group = parser.add_argument_group('arguments for utility')
+
+ utility_group.add_argument(
+ '-p',
+ '--generate_profile_data',
+ action='store_true',
+ help='generate profiling data')
+
## circle2circle arguments
circle2circle_group = parser.add_argument_group('arguments for optimization')
'-o', '--output_path', type=str, help='full filepath of the output file')
# optimization pass
- circle2circle_group.add_argument(
- '--all', action='store_true', help='enable all optimization pass')
- circle2circle_group.add_argument(
- '--fold_dequantize', action='store_true', help='fold Dequantize op')
- circle2circle_group.add_argument(
- '--fuse_add_with_tconv', action='store_true', help='fuse Add op to Transposed')
- circle2circle_group.add_argument(
- '--fuse_batchnorm_with_tconv',
- action='store_true',
- help='fuse BatchNorm op to Transposed Convolution op')
- circle2circle_group.add_argument(
- '--fuse_bcq', action='store_true', help='apply Binary Coded Quantization')
- circle2circle_group.add_argument(
- '--fuse_preactivation_batchnorm',
- action='store_true',
- help='fuse BatchNorm operators of pre-activations to Convolution op')
- circle2circle_group.add_argument(
- '--make_batchnorm_gamma_positive',
- action='store_true',
- help="""make negative gamma of BatchNorm to a small positive value (1e-10).
- Note that this pass can change the execution result of the model.
- So, use it only when the impact is known to be acceptable.""")
- circle2circle_group.add_argument(
- '--fuse_activation_function',
- action='store_true',
- help='fuse Activation function to a preceding operator')
- circle2circle_group.add_argument(
- '--fuse_instnorm', action='store_true', help='fuse ops to InstanceNorm operator')
- circle2circle_group.add_argument(
- '--replace_cw_mul_add_with_depthwise_conv',
- action='store_true',
- help='replace channel-wise Mul/Add with DepthwiseConv2D')
- circle2circle_group.add_argument(
- '--resolve_customop_add',
- action='store_true',
- help='convert Custom(Add) op to Add op')
- circle2circle_group.add_argument(
- '--resolve_customop_batchmatmul',
- action='store_true',
- help='convert Custom(BatchMatmul) op to BatchMatmul op')
- circle2circle_group.add_argument(
- '--resolve_customop_matmul',
- action='store_true',
- help='convert Custom(Matmul) op to Matmul op')
+ for opt in _utils._CONSTANT.OPTIMIZATION_OPTS:
+ # opt = (option_name, help_message)
+ circle2circle_group.add_argument('--' + opt[0], action='store_true', help=opt[1])
return parser
python -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
install Pillow==6.2.2
+# Install PyTorch and ONNX related
+python -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
+ --trusted-host download.pytorch.org \
+ install torch==1.7.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
+
+# NOTE Latest onnx 1.8.1 has compatibility issue with onnx-tf 1.7.0
+# MUST install with onnx==1.8.0
+# Provide install of custom onnx-tf
+if [ -n "${EXT_ONNX_TF_WHL}" ]; then
+ python -m pip --default-timeout=1000 install onnx==1.8.0 ${EXT_ONNX_TF_WHL}
+else
+ python -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
+ install onnx==1.8.0 onnx-tf==1.7.0
+fi
+
# Create python symoblic link
rm -f ${DRIVER_PATH}/python
ln -s venv/bin/python ${DRIVER_PATH}/python
parser.add_argument(
'-i', '--input_path', type=str, help='full filepath of the input file')
parser.add_argument(
- '-d', '--input_data', type=str, help='full filepath of the input data file')
+ '-d',
+ '--input_data',
+ type=str,
+ help=
+ 'full filepath of the input data file. if not specified, run with random input data.'
+ )
parser.add_argument(
'-o', '--output_path', type=str, help='full filepath of the output file')
+ # argument for profiling
+ parser.add_argument(
+ '-p',
+ '--generate_profile_data',
+ action='store_true',
+ help='generate profiling data')
+
## arguments for quantization
quantization_group = parser.add_argument_group('arguments for quantization')
type=str,
help='record mode (supported: percentile/moving_average, default=percentile)')
- # set default values
- quantization_group.set_defaults(
- input_dtype='float32',
- quantized_dtype='uint8',
- granularity='layer',
- min_percentile='1.0',
- max_percentile='99.0',
- mode='percentile')
-
return parser
+def _set_default_values(args):
+ if not _utils._is_valid_attr(args, 'input_dtype'):
+ setattr(args, 'input_dtype', 'float32')
+ if not _utils._is_valid_attr(args, 'quantized_dtype'):
+ setattr(args, 'quantized_dtype', 'uint8')
+ if not _utils._is_valid_attr(args, 'granularity'):
+ setattr(args, 'granularity', 'layer')
+ if not _utils._is_valid_attr(args, 'mode'):
+ setattr(args, 'mode', 'percentile')
+ if not _utils._is_valid_attr(args, 'min_percentile'):
+ setattr(args, 'min_percentile', '1.0')
+ if not _utils._is_valid_attr(args, 'max_percentile'):
+ setattr(args, 'max_percentile', '99.0')
+
+
def _verify_arg(parser, args):
"""verify given arguments"""
# check if required arguments is given
missing = []
if not _utils._is_valid_attr(args, 'input_path'):
missing.append('-i/--input_path')
- if not _utils._is_valid_attr(args, 'input_data'):
- missing.append('-d/--input_data')
if not _utils._is_valid_attr(args, 'output_path'):
missing.append('-o/--output_path')
if len(missing):
tmpdir,
os.path.splitext(os.path.basename(args.input_path))[0]) + '1.circle'
circle_quantizer_cmd.append(tmp_output_path_1)
+ # profiling
+ if _utils._is_valid_attr(args, 'generate_profile_data'):
+ circle_quantizer_cmd.append('--generate_profile_data')
f.write((' '.join(circle_quantizer_cmd) + '\n').encode())
if _utils._is_valid_attr(args, 'mode'):
circle_record_minmax_cmd.append('--mode')
circle_record_minmax_cmd.append(getattr(args, 'mode'))
+ # profiling
+ if _utils._is_valid_attr(args, 'generate_profile_data'):
+ circle_record_minmax_cmd.append('--generate_profile_data')
f.write((' '.join(circle_record_minmax_cmd) + '\n').encode())
circle_quantizer_cmd.append(tmp_output_path_2)
if _utils._is_valid_attr(args, 'output_path'):
circle_quantizer_cmd.append(getattr(args, 'output_path'))
+ # profiling
+ if _utils._is_valid_attr(args, 'generate_profile_data'):
+ circle_quantizer_cmd.append('--generate_profile_data')
f.write((' '.join(circle_quantizer_cmd) + '\n').encode())
# parse configuration file
_utils._parse_cfg(args, 'one-quantize')
+ # set default values
+ _set_default_values(args)
+
# verify arguments
_verify_arg(parser, args)
file(APPEND "${DRIVER_SCRIPT}" " export PATH=$USER_PATH:$PATH\n")
file(APPEND "${DRIVER_SCRIPT}" "fi\n")
file(APPEND "${DRIVER_SCRIPT}" "\n")
+file(APPEND "${DRIVER_SCRIPT}" "# refer https://github.com/Samsung/ONE/issues/6286\n")
+file(APPEND "${DRIVER_SCRIPT}" "set -o pipefail\n\n")
+file(APPEND "${DRIVER_SCRIPT}" "fail_count=0\n")
+file(APPEND "${DRIVER_SCRIPT}" "trap \"(( fail_count++ ))\" ERR\n\n")
foreach(TESTITEM IN ITEMS ${TESTITEMS})
get_filename_component(ITEM_PREFIX ${TESTITEM} NAME_WE)
install(FILES ${CONFIGITEM} DESTINATION test)
endforeach(CONFIGITEM)
-file(APPEND "${DRIVER_SCRIPT}" "popd> /dev/null")
+file(APPEND "${DRIVER_SCRIPT}" "popd > /dev/null\n\n")
+
+file(APPEND "${DRIVER_SCRIPT}"
+"if [[ $fail_count != 0 ]]; then
+ echo \"$fail_count TESTS FAILED\"
+ exit 255
+else
+ echo \"ALL TESTS PASSED!\"
+fi\n
+")
set(PREPARE_TEST_MATERIALS_SH "${CMAKE_CURRENT_SOURCE_DIR}/prepare_test_materials.sh")
set(PREPROCESS_IMAGES_PY "${CMAKE_CURRENT_SOURCE_DIR}/preprocess_images.py")
# See the License for the specific language governing permissions and
# limitations under the License.
+# one-import-tf -> one-optimize
+
filename_ext="$(basename -- $0)"
filename="${filename_ext%.*}"
# See the License for the specific language governing permissions and
# limitations under the License.
+# one-import-tf -> one-optimize -> one-pack
+
filename_ext="$(basename -- $0)"
filename="${filename_ext%.*}"
--- /dev/null
+[one-build]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=True
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v1
+
+[one-quantize]
+input_path=inception_v3.circle
+output_path=inception_v3.quantized.circle
+input_data=inception_v3_test_data.h5
--- /dev/null
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-quantize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_003.cfg"
+outputfile="inception_v3.quantized.circle"
+
+rm -rf ${outputfile}
+
+# run test
+one-build -C ${configfile} > /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
--- /dev/null
+[one-build]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=True
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v1
+
+[one-codegen]
+backend=dummy
+command=-o sample.tvn inception_v3.circle
--- /dev/null
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-codegen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ rm -rf ../bin/dummy-compile
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_004.cfg"
+outputfile="sample.tvn"
+
+rm -rf ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+one-build -C ${configfile} > /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
--- /dev/null
+[one-build]
+one-import-tf=False
+one-import-tflite=True
+one-import-bcq=False
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=True
+
+[one-import-tflite]
+input_path=inception_v3.tflite
+output_path=inception_v3.circle
+
+[one-optimize]
+input_path=inception_v3.circle
+output_path=inception_v3.opt.circle
+
+[one-codegen]
+backend=dummy
+command=-o sample.tvn inception_v3.opt.circle
--- /dev/null
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tflite -> one-optimize -> one-codgen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ rm -rf ../bin/dummy-compile
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_005.cfg"
+outputfile="sample.tvn"
+
+rm -rf ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+one-build -C ${configfile} > /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
--- /dev/null
+[one-build]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=True
+one-quantize=True
+one-pack=False
+one-codegen=True
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v1
+
+[one-optimize]
+input_path=inception_v3.circle
+output_path=inception_v3.opt.circle
+
+[one-quantize]
+input_path=inception_v3.opt.circle
+output_path=inception_v3.quantized.circle
+input_data=inception_v3_test_data.h5
+
+[one-codegen]
+backend=dummy
+command=-o sample.tvn inception_v3.quantized.circle
--- /dev/null
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-optimize -> one-quantize -> one-codegen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ rm -rf ../bin/dummy-compile
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_006.cfg"
+outputfile="sample.tvn"
+
+rm -rf ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+one-build -C ${configfile} > /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
--- /dev/null
+[one-build]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=True
+one-pack=True
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v1
+
+[one-optimize]
+input_path=inception_v3.circle
+output_path=inception_v3.opt.circle
+
+[one-quantize]
+input_path=inception_v3.opt.circle
+output_path=inception_v3.quantized.circle
+input_data=inception_v3_test_data.h5
+
+[one-pack]
+input_path=inception_v3.quantized.circle
+output_path=inception_v3_pkg
--- /dev/null
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-optimize -> one-quantize -> one-pack
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_007.cfg"
+outputfile="inception_v3_pkg"
+
+rm -rf ${outputfile}
+
+# run test
+one-build -C ${configfile} > /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
--- /dev/null
+[one-build]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=True
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=True
+
+[one-import-onnx]
+input_path=test_onnx_model.onnx
+output_path=test_onnx_model.circle
+
+[one-optimize]
+input_path=test_onnx_model.circle
+output_path=test_onnx_model.opt.circle
+all=True
+remove_redundant_transpose=True
+
+[one-codegen]
+backend=dummy
+command=-o test_onnx_model.bin test_onnx_model.opt.circle
--- /dev/null
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-optimize -> one-quantize -> one-codegen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ rm -rf ../bin/dummy-compile
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_008.cfg"
+outputfile="test_onnx_model.bin"
+
+rm -rf ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+one-build -C ${configfile} > /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
--- /dev/null
+[one-build]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=True
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=True
+
+[one-import-onnx]
+input_path=onnx_conv2d_conv2d.onnx
+output_path=onnx_conv2d_conv2d.circle
+
+[one-optimize]
+input_path=onnx_conv2d_conv2d.circle
+output_path=onnx_conv2d_conv2d.opt.circle
+all=True
+remove_redundant_transpose=True
+convert_nchw_to_nhwc=True
+
+[one-codegen]
+backend=dummy
+command=-o onnx_conv2d_conv2d.bin onnx_conv2d_conv2d.opt.circle
--- /dev/null
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-onnx -> one-optimize -> one-codegen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ rm -rf ../bin/dummy-compile
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_009.cfg"
+outputfile="onnx_conv2d_conv2d.bin"
+
+rm -rf ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+one-build -C ${configfile} > /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
trap_err_onexit()
{
- if grep -q "'one-build' section is required in configuraion file" "${filename}.log"; then
+ if grep -q "\[one-build\] section is required in configuraion file" "${filename}.log"; then
echo "${filename_ext} SUCCESS"
exit 0
fi
--- /dev/null
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+# copy help-compile to bin folder
+cp help-compile ../bin/help-compile
+
+# run test
+one-codegen -b help -- -h > ${filename}.log
+
+rm -rf ../bin/help-compile
+
+if grep -q "HELP MESSAGE!!" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+fi
+
+trap_err_onexit
--- /dev/null
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run one-codegen with dummy-compile driver
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ rm -rf ../bin/dummy-compile
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+outputfile="sample.tvn"
+
+rm -rf ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+one-codegen -b dummy -o ${outputfile} "dummy.circle"
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
--- /dev/null
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run one-codegen with dummy-compile driver
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ rm -rf ../bin/dummy-compile
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+outputfile="sample.tvn"
+
+rm -rf ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+one-codegen -b dummy -- -o ${outputfile} "dummy.circle"
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
--- /dev/null
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# print one-codegen's help message
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+# run test
+one-codegen -h > ${filename}.log
+
+if grep -q "command line tool for code generation" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+fi
+
+trap_err_onexit
--- /dev/null
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with no input
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ if grep -q "error: the following arguments are required" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+# run test
+one-codegen > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
--- /dev/null
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./test_onnx_model.onnx"
+outputfile="./test_onnx_model.circle"
+
+rm -rf ${outputfile}
+rm -rf ${outputfile}.log
+
+# run test
+one-import-onnx \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${outputfile}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
--- /dev/null
+[one-build]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=True
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-onnx]
+input_path=test_onnx_model.onnx
+output_path=test_onnx_model.circle
--- /dev/null
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-import_005.cfg"
+outputfile="test_onnx_model.circle"
+
+rm -f ${outputfile}
+
+# run test
+one-build -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
fi
# run test
-one-optimize --all \
+one-optimize --O1 \
--input_path ${inputfile} \
--output_path ${outputfile} >> /dev/null
rm -rf ${outputfile}.log
# run test
-one-optimize --all \
+one-optimize --O1 \
--input_path ${inputfile} \
--output_path ${outputfile} > ${filename}.log
rm -rf ${outputfile}.log
# run test
-one-optimize --all \
+one-optimize --O1 \
--input_path ${inputfile} \
--output_path ${outputfile} > ${filename}.log
fi
# run test
-one-optimize --all \
+one-optimize --O1 \
--input_path "${inputfile}" > "${filename}.log" 2>&1
echo "${filename_ext} FAILED"
--- /dev/null
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.random.quantized.circle"
+
+rm -rf ${outputfile}
+
+# to create inception_v3.circle
+if [[ ! -s ${inputfile} ]]; then
+ /bin/bash one-import_001.test >> /dev/null
+ return_code=$?
+ if [[ ${return_code} != 0 ]]; then
+ trap_err_onexit
+ fi
+fi
+
+# run test without input data
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--input_path ./inception_v3.circle \
+--output_path ./inception_v3.random.quantized.circle >> /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
# https://github.com/Samsung/ONE/issues/4268#issuecomment-725025805
fi
+if [[ ! -s "test_onnx_model.onnx" ]]; then
+ rm -rf test_onnx_model.zip
+ wget https://github.com/Samsung/ONE/files/5768243/test_onnx_model.zip
+ unzip test_onnx_model.zip
+ # https://github.com/Samsung/ONE/issues/5548#issuecomment-754373360
+fi
+
+if [[ ! -s "onnx_conv2d_conv2d.onnx" ]]; then
+ rm -rf onnx_conv2d_conv2d.zip
+ wget https://github.com/Samsung/ONE/files/5774648/onnx_conv2d_conv2d.zip
+ unzip onnx_conv2d_conv2d.zip
+ # https://github.com/Samsung/ONE/issues/5577#issuecomment-755078444
+fi
+
# prepare 'inception_v3.circle' file used for quantization test
inputfile="./inception_v3.pb"
outputfile="./inception_v3.circle"
import sys
+class _CONSTANT:
+ __slots__ = () # This prevents access via __dict__.
+ OPTIMIZATION_OPTS = (
+ # (OPTION_NAME, HELP_MESSAGE)
+ ('O1', 'enable O1 optimization pass'),
+ ('convert_nchw_to_nhwc',
+ 'Experimental: This will convert NCHW operators to NHWC under the assumption that input model is NCHW.'
+ ),
+ ('nchw_to_nhwc_preserve_input_shape',
+ 'preserve the input shape of the model (argument for convert_nchw_to_nhwc)'),
+ ('nchw_to_nhwc_preserve_output_shape',
+ 'preserve the output shape of the model (argument for convert_nchw_to_nhwc)'),
+ ('fold_add_v2', 'fold AddV2 op with constant inputs'),
+ ('fold_cast', 'fold Cast op with constant input'),
+ ('fold_dequantize', 'fold Dequantize op'),
+ ('fold_sparse_to_dense', 'fold SparseToDense op'),
+ ('forward_reshape_to_unaryop', 'Forward Reshape op'),
+ ('fuse_add_with_tconv', 'fuse Add op to Transposed'),
+ ('fuse_batchnorm_with_conv', 'fuse BatchNorm op to Convolution op'),
+ ('fuse_batchnorm_with_dwconv', 'fuse BatchNorm op to Depthwise Convolution op'),
+ ('fuse_batchnorm_with_tconv', 'fuse BatchNorm op to Transposed Convolution op'),
+ ('fuse_bcq', 'apply Binary Coded Quantization'),
+ ('fuse_preactivation_batchnorm',
+ 'fuse BatchNorm operators of pre-activations to Convolution op'),
+ ('make_batchnorm_gamma_positive',
+ 'make negative gamma of BatchNorm to a small positive value (1e-10).'
+ ' Note that this pass can change the execution result of the model.'
+ ' So, use it only when the impact is known to be acceptable.'),
+ ('fuse_activation_function', 'fuse Activation function to a preceding operator'),
+ ('fuse_instnorm', 'fuse ops to InstanceNorm operator'),
+ ('replace_cw_mul_add_with_depthwise_conv',
+ 'replace channel-wise Mul/Add with DepthwiseConv2D'),
+ ('remove_redundant_reshape', 'fuse or remove subsequent Reshape ops'),
+ ('remove_redundant_transpose', 'fuse or remove subsequent Transpose ops'),
+ ('remove_unnecessary_reshape', 'remove unnecessary reshape ops'),
+ ('remove_unnecessary_slice', 'remove unnecessary slice ops'),
+ ('remove_unnecessary_strided_slice', 'remove unnecessary strided slice ops'),
+ ('remove_unnecessary_split', 'remove unnecessary split ops'),
+ ('resolve_customop_add', 'convert Custom(Add) op to Add op'),
+ ('resolve_customop_batchmatmul',
+ 'convert Custom(BatchMatmul) op to BatchMatmul op'),
+ ('resolve_customop_matmul', 'convert Custom(Matmul) op to Matmul op'),
+ ('shuffle_weight_to_16x1float32',
+ 'convert weight format of FullyConnected op to SHUFFLED16x1FLOAT32.'
+ ' Note that it only converts weights whose row is a multiple of 16'),
+ ('substitute_pack_to_reshape', 'convert single input Pack op to Reshape op'),
+ ('substitute_squeeze_to_reshape', 'convert certain condition Squeeze to Reshape'),
+ ('substitute_transpose_to_reshape',
+ 'convert certain condition Transpose to Reshape'),
+ ('transform_min_max_to_relu6', 'transform Minimum-Maximum pattern to Relu6 op'))
+
+
+_CONSTANT = _CONSTANT()
+
+
def _add_default_arg(parser):
# version
parser.add_argument(
def _make_circle2circle_cmd(args, driver_path, input_path, output_path):
"""make a command for running circle2circle"""
cmd = [os.path.expanduser(c) for c in [driver_path, input_path, output_path]]
+ # profiling
+ if _is_valid_attr(args, 'generate_profile_data'):
+ cmd.append('--generate_profile_data')
# optimization pass
- if _is_valid_attr(args, 'all'):
- cmd.append('--all')
- if _is_valid_attr(args, 'fold_dequantize'):
- cmd.append('--fold_dequantize')
- if _is_valid_attr(args, 'fuse_add_with_tconv'):
- cmd.append('--fuse_add_with_tconv')
- if _is_valid_attr(args, 'fuse_batchnorm_with_tconv'):
- cmd.append('--fuse_batchnorm_with_tconv')
- if _is_valid_attr(args, 'fuse_bcq'):
- cmd.append('--fuse_bcq')
- if _is_valid_attr(args, 'fuse_instnorm'):
- cmd.append('--fuse_instnorm')
- if _is_valid_attr(args, 'resolve_customop_add'):
- cmd.append('--resolve_customop_add')
- if _is_valid_attr(args, 'resolve_customop_batchmatmul'):
- cmd.append('--resolve_customop_batchmatmul')
- if _is_valid_attr(args, 'resolve_customop_matmul'):
- cmd.append('--resolve_customop_matmul')
+ for opt in _CONSTANT.OPTIMIZATION_OPTS:
+ if _is_valid_attr(args, opt[0]):
+ cmd.append('--' + opt[0])
return cmd
--- /dev/null
+install(FILES validate_onnx2circle.py
+ PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+ GROUP_READ GROUP_EXECUTE
+ WORLD_READ WORLD_EXECUTE
+ DESTINATION test)
--- /dev/null
+# validate-onnx2circle
+
+_validate-onnx2circle_ provides validation of onnx to optimized circle conversion
+by comparing execution results of original onnx model and optimized circle model.
+
+This is currently in experimental state.
+
+## How to run the script
+
+Install `onnx-runtime` inside virtual environment
+```
+source install_path/bin/venv/bin/activate
+
+python -m pip --default-timeout=1000 --trusted-host pypi.org \
+ --trusted-host files.pythonhost.org install onnxruntime==1.6.0
+
+deactivate
+```
+
+Run the sctipt
+```bash
+cd install_path/test
+
+driver='one/build/debug/compiler/luci-eval-driver/luci_eval_driver'
+onnx_filepath='path_to_onnx_model.onnx'
+circle_filepath='path_to_optimized_circle.circle'
+
+./validate_onnx2circle.py --driver ${driver} --onnx ${onnx_filepath} --circle ${circle_filepath}
+```
+
+Output will show something like this
+```
+Run ONNX...
+Run luci-interpreter...
+Compare 0 True
+```
--- /dev/null
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # '''
+''''export PY_PATH=${SCRIPT_PATH}/../bin/venv/bin/python # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@" # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255 # '''
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE This is an experimental script to evaluate onnx-circle conversion
+# by running onnxruntime and luci-interpreter.
+# Plan is to run this regularly in CI
+
+import subprocess
+import argparse
+import numpy as np
+import torch
+import onnx
+import onnxruntime as ort
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--driver', type=str, required=True)
+parser.add_argument('--onnx', type=str, required=True)
+parser.add_argument('--circle', type=str, required=True)
+args = parser.parse_args()
+
+driver = args.driver
+onnx_filepath = args.onnx
+circle_filepath = args.circle
+
+
+def to_numpy(tensor):
+ return tensor.cpu().numpy()
+
+
+def to_nhwc(tensor):
+ if (tensor.ndim == 4):
+ return np.transpose(tensor, (0, 2, 3, 1))
+ return tensor
+
+
+class OnnxRunner:
+ def __init__(self, filepath):
+ self.filepath = filepath
+ self.session = None
+ self.inputs = None
+ self.inputs_size = None
+ self.inputs_data = None
+ self.outputs = None
+ self.outputs_size = None
+
+ def load(self):
+ model = onnx.load(self.filepath)
+ onnx.checker.check_model(model)
+ self.session = ort.InferenceSession(self.filepath)
+
+ def feed_random_inputs(self):
+ self.inputs = self.session.get_inputs()
+ self.inputs_size = len(self.inputs)
+ # reset input dictionary
+ self.inputs_data = {}
+ for in_idx in range(self.inputs_size):
+ input_shape = self.inputs[in_idx].shape
+ input_type = self.inputs[in_idx].type
+ if input_type == 'tensor(float)':
+ torch_type = torch.float32
+ else:
+ # TODO support other dtype
+ raise SystemExit("Unsupported input dtype")
+
+ x = torch.randn(input_shape, dtype=torch_type)
+ input_npa = to_numpy(x)
+ self.inputs_data.update({self.inputs[in_idx].name: input_npa})
+
+ # save NHWC form of input for luci-interpreter
+ input_npa_nhwc = to_nhwc(input_npa)
+ input_npa_nhwc.tofile(circle_filepath + ".input" + str(in_idx))
+
+ def run(self):
+ self.outs = self.session.run(None, self.inputs_data)
+
+ def get_outputs(self):
+ self.outputs = self.session.get_outputs()
+ self.outputs_size = len(self.outputs)
+
+
+# Run ONNX model
+print("Run ONNX...")
+onnx_runner = OnnxRunner(onnx_filepath)
+onnx_runner.load()
+onnx_runner.feed_random_inputs()
+onnx_runner.run()
+onnx_runner.get_outputs()
+
+# Execute luci interpreter
+print("Run luci-interpreter...")
+process = subprocess.run(
+ [
+ driver, circle_filepath,
+ str(onnx_runner.inputs_size), circle_filepath + ".input",
+ circle_filepath + ".output"
+ ],
+ check=True)
+
+# Compare results
+rtolerance = 1e-03
+atolerance = 1e-04
+result_compare = True
+for idx in range(onnx_runner.outputs_size):
+ output_shape = onnx_runner.outputs[idx].shape
+ output_type = onnx_runner.outputs[idx].type
+ if output_type == 'tensor(float)':
+ output_np_type = np.float32
+ else:
+ # TODO support other dtype
+ raise SystemExit("Unsupported output dtype")
+
+ # output of luci-interpreter
+ output_data = np.fromfile(circle_filepath + ".output" + str(idx), output_np_type)
+ shape_file = open(circle_filepath + ".output" + str(idx) + ".shape", 'r')
+ output_shape = [int(i) for i in shape_file.read().split(',')]
+ luci_output_data = np.reshape(output_data, output_shape)
+
+ # output of onnx runtime
+ output_nchw = onnx_runner.outs[idx]
+ output_nhwc = to_nhwc(output_nchw)
+
+ # diff has tensor of boolean for each values within tolerance or not
+ diff = np.isclose(output_nhwc, luci_output_data, rtol=rtolerance, atol=atolerance)
+ # get one boolean if all are True then True
+ result_compare_one = np.all(diff)
+ print("Compare", idx, result_compare_one)
+ if (not result_compare_one):
+ diff_val = np.subtract(output_nhwc, luci_output_data)
+ print("ONNX Result", output_nhwc)
+ print("Diff", diff_val)
+ print("Diff Max", np.ndarray.max(diff_val))
+
+ result_compare = result_compare and result_compare_one
+
+if (not result_compare):
+ exit(-1)
+
+exit(0)
target_include_directories(moco_onnx_frontend PUBLIC include)
target_link_libraries(moco_onnx_frontend PUBLIC moco_onnx_proto)
target_link_libraries(moco_onnx_frontend PUBLIC loco)
-target_link_libraries(moco_onnx_frontend PRIVATE stdex)
target_link_libraries(moco_onnx_frontend PRIVATE cwrap)
nnas_find_package(GTest QUIET)
-require("stdex")
require("loco")
require("cwrap")
// TODO Make comments clear
void convert_graph(::onnx::ModelProto &onnx_model_proto, loco::Graph *graph)
{
- auto nodes = stdex::make_unique<moco::onnx::SymbolTable>();
- auto input_names = stdex::make_unique<moco::onnx::SymbolTable>();
+ auto nodes = std::make_unique<moco::onnx::SymbolTable>();
+ auto input_names = std::make_unique<moco::onnx::SymbolTable>();
moco::onnx::GraphBuilderContext gb_context(graph, nodes.get(), input_names.get());
{
/**
-* @brief Parent class of onnx operation graph builders
-* @note GraphBuilder call proper build and validate function according to opset version
-*/
+ * @brief Parent class of onnx operation graph builders
+ * @note GraphBuilder call proper build and validate function according to opset version
+ */
class GraphBuilder
{
public:
};
/**
-* @brief Class to store context to build IR from onnx
-*/
+ * @brief Class to store context to build IR from onnx
+ */
class GraphBuilderContext
{
public:
GraphBuilderContext(loco::Graph *g, SymbolTable *nodes, SymbolTable *input_names)
- : _g(g), _nodes(nodes), _input_names(input_names)
+ : _g(g), _nodes(nodes), _input_names(input_names)
{
// DO NOTHING
}
{
/**
-* @brief Class to return graph builder for passed onnx Operator
-*/
+ * @brief Class to return graph builder for passed onnx Operator
+ */
class GraphBuilderRegistry
{
public:
/**
- * @brief Returns registered GraphBuilder pointer for operator or
- * nullptr if not registered
- */
+ * @brief Returns registered GraphBuilder pointer for operator or
+ * nullptr if not registered
+ */
const GraphBuilder *lookup(const std::string &op) const
{
if (_builder_map.find(op) == _builder_map.end())
} // namespace onnx
} // namespace moco
-#include <stdex/Memory.h>
+#include <memory>
-#define REGISTER_OP_BUILDER(NAME, BUILDER) \
- namespace \
- { \
- __attribute__((constructor)) void reg_op(void) \
- { \
- std::unique_ptr<moco::onnx::BUILDER> builder = stdex::make_unique<moco::onnx::BUILDER>(); \
- moco::onnx::GraphBuilderRegistry::get().add(#NAME, std::move(builder)); \
- } \
+#define REGISTER_OP_BUILDER(NAME, BUILDER) \
+ namespace \
+ { \
+ __attribute__((constructor)) void reg_op(void) \
+ { \
+ std::unique_ptr<moco::onnx::BUILDER> builder = std::make_unique<moco::onnx::BUILDER>(); \
+ moco::onnx::GraphBuilderRegistry::get().add(#NAME, std::move(builder)); \
+ } \
}
#endif // __MOCO_FRONTEND_ONNX_GRAPH_BUILDER_REGISTRY_H__
{
/**
- * @brief GraphBuilder for Constant(since version 1) node
- */
+ * @brief GraphBuilder for Constant(since version 1) node
+ */
class Constant_V1
{
public:
};
/**
- * @brief GraphBuilder for Constant(since version 9) node
- * @note Until version 1, only FLOAT16, FLOAT, DOUBLE was supported
- * Since version 9, all types are supported
- */
+ * @brief GraphBuilder for Constant(since version 9) node
+ * @note Until version 1, only FLOAT16, FLOAT, DOUBLE was supported
+ * Since version 9, all types are supported
+ */
class Constant_V9
{
public:
};
/**
- * @brief GraphBuilder for Constant node
- */
+ * @brief GraphBuilder for Constant node
+ */
class ConstantGraphBuilder : public GraphBuilder
{
public:
{
/**
- * @brief GraphBuilder for Identity(since version 1) node
- */
+ * @brief GraphBuilder for Identity(since version 1) node
+ */
class Identity_V1
{
public:
};
/**
- * @brief GraphBuilder for Identity node
- */
+ * @brief GraphBuilder for Identity node
+ */
class IdentityGraphBuilder : public GraphBuilder
{
public:
target_link_libraries(onnx2circle PRIVATE exo)
target_link_libraries(onnx2circle PRIVATE locop)
target_link_libraries(onnx2circle PRIVATE hermes_std)
-target_link_libraries(onnx2circle PRIVATE stdex)
target_link_libraries(onnx2circle PRIVATE angkor cwrap)
target_link_libraries(onnx2circle PRIVATE mir2loco)
target_link_libraries(onnx2circle PRIVATE mir_onnx_importer)
-require("stdex")
require("hermes-std")
require("mir2loco")
require("mir")
#include "hermes/ConsoleReporter.h"
#include "hermes/EnvConfig.h"
-#include "stdex/Memory.h"
-
#include <cassert>
-
+#include <memory>
#include <iostream>
#include <stdexcept>
#include <string>
if (ctx == nullptr)
{
ctx = new hermes::Context;
- ctx->sinks()->append(stdex::make_unique<hermes::ConsoleReporter>());
- ctx->config(stdex::make_unique<EnvConfig>("ONNX2CIRCLE_Log"));
+ ctx->sinks()->append(std::make_unique<hermes::ConsoleReporter>());
+ ctx->config(std::make_unique<EnvConfig>("ONNX2CIRCLE_Log"));
}
return ctx;
using EnvConfig = hermes::EnvConfig<hermes::EnvFormat::BooleanNumber>;
// This line allows users to control all the exo-circle loggers via ONNX2CIRCLE_Log_Backend
- exo::LoggingContext::get()->config(stdex::make_unique<EnvConfig>("ONNX2CIRCLE_Log_Backend"));
+ exo::LoggingContext::get()->config(std::make_unique<EnvConfig>("ONNX2CIRCLE_Log_Backend"));
LOGGER(l);
target_link_libraries(onnxkitproto PUBLIC libprotobuf)
add_executable(onnxkit ${SOURCES})
-target_link_libraries(onnxkit PRIVATE stdex)
target_link_libraries(onnxkit PRIVATE cli)
target_link_libraries(onnxkit PRIVATE onnxkitproto)
target_link_libraries(onnxkit PRIVATE nncc_common)
- onnx
- Protobuf
- cli
-- stdex
#include "DecodeCommand.hpp"
#include <cli/App.h>
-#include <stdex/Memory.h>
+
+#include <memory>
int main(int argc, char **argv)
{
cli::App app{argv[0]};
- app.insert("encode", stdex::make_unique<EncodeCommand>());
- app.insert("decode", stdex::make_unique<DecodeCommand>());
+ app.insert("encode", std::make_unique<EncodeCommand>());
+ app.insert("decode", std::make_unique<DecodeCommand>());
return app.run(argc - 1, argv + 1);
}
#include "Support.hpp"
-#include <stdex/Memory.h>
-
+#include <memory>
#include <cassert>
#include <fstream>
#include <stdexcept>
return nullptr;
}
- auto stream = stdex::make_unique<T>(path.c_str(), mode);
+ auto stream = std::make_unique<T>(path.c_str(), mode);
if (!stream->is_open())
{
throw std::runtime_error{"ERROR: Failed to open " + path};
std::unique_ptr<UI> make_ui(const Cmdline &cmdargs)
{
- auto iocfg = stdex::make_unique<UI>();
+ auto iocfg = std::make_unique<UI>();
auto in = open_fstream<std::ifstream>(cmdargs.get_or(0, "-"), std::ios::in | std::ios::binary);
iocfg->in(std::move(in));
add_library(oops INTERFACE)
target_include_directories(oops INTERFACE include)
target_link_libraries(oops INTERFACE pepper_str)
+target_link_libraries(oops INTERFACE nncc_coverage)
if(NOT ENABLE_TEST)
return()
nnas_find_package(GTest REQUIRED)
-GTest_AddTest(oops_test test.cpp)
+GTest_AddTest(oops_test src/oops.test.cpp)
target_link_libraries(oops_test oops)
{
public:
InternalExn(const char *filename, const int line, const std::string &msg)
- : _filename(filename), _line(to_uint32(line)), _msg(msg)
+ : _filename(filename), _line(to_uint32(line)), _msg(msg)
{
construct_full_msg();
}
explicit InternalExn(const char *filename, const int line, const std::string &msg, uint32_t val)
- : _filename(filename), _line(to_uint32(line)), _msg(msg + ": " + std::to_string(val))
+ : _filename(filename), _line(to_uint32(line)), _msg(msg + ": " + std::to_string(val))
{
construct_full_msg();
}
explicit InternalExn(const char *filename, const int line, const std::string &msg,
const std::string &val)
- : _filename(filename), _line(to_uint32(line)), _msg(msg + ": " + val)
+ : _filename(filename), _line(to_uint32(line)), _msg(msg + ": " + val)
{
construct_full_msg();
}
void construct_full_msg()
{
_full_msg =
- "Internal Exception. " + _msg + " [" + _filename + ":" + std::to_string(_line) + "]";
+ "Internal Exception. " + _msg + " [" + _filename + ":" + std::to_string(_line) + "]";
}
std::string _full_msg;
out << pepper::str(attr, " = ", val);
}
- void build_info(std::stringstream &) { /* empty */}
+ void build_info(std::stringstream &)
+ { /* empty */
+ }
// when only one info of string is provided
void build_info(std::stringstream &out, const std::string &val) { out << val; }
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "oops/InternalExn.h"
+#include "oops/UserExn.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+void batman() { INTERNAL_EXN("Here comes Joker"); }
+
+void star_wars() { INTERNAL_EXN_V("Something is approaching", "Darth Vader"); }
+
+enum class InfinityStones
+{
+ SpaceStone,
+ RealityStone,
+ OtherStones,
+};
+
+void avengers()
+{
+ std::string where;
+ std::string separator = ":";
+ try
+ {
+ // exception will be raised in next line
+ where = __FILE__ + separator + std::to_string(__LINE__ + 1);
+ INTERNAL_EXN_V("Last stone was gathered", oops::to_uint32(InfinityStones::SpaceStone));
+ }
+ catch (const oops::InternalExn &e)
+ {
+ auto msg = std::string(e.what());
+ ASSERT_TRUE(msg.find("Last stone was gathered: 0") != std::string::npos);
+ ASSERT_TRUE(msg.find(where) != std::string::npos);
+ }
+}
+
+} // namespace
+
+TEST(oopsTest, InternalExn)
+{
+ ASSERT_THROW(batman(), oops::InternalExn);
+ ASSERT_THROW(star_wars(), oops::InternalExn);
+
+ avengers();
+}
+
+TEST(oopsTest, UserExn_one_info_after_msg)
+{
+ try
+ {
+ throw oops::UserExn("Not a member of Avenger", "Kingsman");
+ }
+ catch (const oops::UserExn &e)
+ {
+ auto msg = std::string(e.what());
+ ASSERT_TRUE(msg.find("Not a member of Avenger: Kingsman") != std::string::npos);
+ }
+}
+
+TEST(oopsTest, UserExn_two_pairs_after_msg)
+{
+ try
+ {
+ std::string hero("Spiderman");
+
+ // clang-format off
+ throw oops::UserExn("Hero's age is wrong",
+ "Hero", hero,
+ "Age", 97);
+ // clang-format on
+ }
+ catch (const oops::UserExn &e)
+ {
+ auto msg = std::string(e.what());
+ ASSERT_TRUE(msg.find("Hero = Spiderman, Age = 97") != std::string::npos);
+ }
+}
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "oops/InternalExn.h"
-#include "oops/UserExn.h"
-
-#include <gtest/gtest.h>
-
-namespace
-{
-
-void batman() { INTERNAL_EXN("Here comes Joker"); }
-
-void star_wars() { INTERNAL_EXN_V("Something is approaching", "Darth Vader"); }
-
-enum class InfinityStones
-{
- SpaceStone,
- RealityStone,
- OtherStones,
-};
-
-void avengers()
-{
- std::string where;
- std::string separator = ":";
- try
- {
- // exception will be raised in next line
- where = __FILE__ + separator + std::to_string(__LINE__ + 1);
- INTERNAL_EXN_V("Last stone was gathered", oops::to_uint32(InfinityStones::SpaceStone));
- }
- catch (const oops::InternalExn &e)
- {
- auto msg = std::string(e.what());
- ASSERT_TRUE(msg.find("Last stone was gathered: 0") != std::string::npos);
- ASSERT_TRUE(msg.find(where) != std::string::npos);
- }
-}
-
-} // namespace
-
-TEST(oopsTest, InternalExn)
-{
- ASSERT_THROW(batman(), oops::InternalExn);
- ASSERT_THROW(star_wars(), oops::InternalExn);
-
- avengers();
-}
-
-TEST(oopsTest, UserExn_one_info_after_msg)
-{
- try
- {
- throw oops::UserExn("Not a member of Avenger", "Kingsman");
- }
- catch (const oops::UserExn &e)
- {
- auto msg = std::string(e.what());
- ASSERT_TRUE(msg.find("Not a member of Avenger: Kingsman") != std::string::npos);
- }
-}
-
-TEST(oopsTest, UserExn_two_pairs_after_msg)
-{
- try
- {
- std::string hero("Spiderman");
-
- // clang-format off
- throw oops::UserExn("Hero's age is wrong",
- "Hero", hero,
- "Age", 97);
- // clang-format on
- }
- catch (const oops::UserExn &e)
- {
- auto msg = std::string(e.what());
- ASSERT_TRUE(msg.find("Hero = Spiderman, Age = 97") != std::string::npos);
- }
-}
add_library(pepper_str INTERFACE)
target_include_directories(pepper_str INTERFACE include)
+target_link_libraries(pepper_str INTERFACE nncc_coverage)
if(NOT ENABLE_TEST)
return()
# Google Test is mandatory for test
nnas_find_package(GTest REQUIRED)
-GTest_AddTest(pepper_str_test test.cpp)
+GTest_AddTest(pepper_str_test src/pepper-str.test.cpp)
target_link_libraries(pepper_str_test pepper_str)
str_impl(os, std::forward<Args>(args)...);
}
-} // namesapce details
+} // namespace details
} // namespace pepper
namespace pepper
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pepper/str.h"
+
+#include <iostream>
+
+#include <gtest/gtest.h>
+
+TEST(StrTests, README)
+{
+ // Let us check whether the example in README.md works!
+ int argc = 4;
+
+ std::cout << pepper::str("There are ", argc, " arguments") << std::endl;
+
+ SUCCEED();
+}
+
+TEST(StrTests, Empty)
+{
+ // pepper::str() returns an empty string
+ ASSERT_EQ(pepper::str(), "");
+}
+
+TEST(StrTests, Single_Int)
+{
+ // Convert a single "int" value as a string
+ ASSERT_EQ(pepper::str(3), "3");
+}
+
+TEST(StrTests, Concat_000)
+{
+ const int n = 3;
+ const int m = 4;
+
+ ASSERT_EQ(pepper::str(n, "+", m, "=", n + m), "3+4=7");
+}
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "pepper/str.h"
-
-#include <iostream>
-
-#include <gtest/gtest.h>
-
-TEST(StrTests, README)
-{
- // Let us check whether the example in README.md works!
- int argc = 4;
-
- std::cout << pepper::str("There are ", argc, " arguments") << std::endl;
-
- SUCCEED();
-}
-
-TEST(StrTests, Empty)
-{
- // pepper::str() returns an empty string
- ASSERT_EQ(pepper::str(), "");
-}
-
-TEST(StrTests, Single_Int)
-{
- // Convert a single "int" value as a string
- ASSERT_EQ(pepper::str(3), "3");
-}
-
-TEST(StrTests, Concat_000)
-{
- const int n = 3;
- const int m = 4;
-
- ASSERT_EQ(pepper::str(n, "+", m, "=", n + m), "3+4=7");
-}
struct imemstream : virtual membuf, std::istream
{
imemstream(char const *base, size_t size)
- : membuf(base, size), std::istream(static_cast<std::streambuf *>(this))
+ : membuf(base, size), std::istream(static_cast<std::streambuf *>(this))
{
}
};
for key in json_load:
if key == "weights":
expected_weights = np.array(json_load["weights"])
- input_weights = tensor["weights"][:]
+ input_weights = tensor["weights"][()]
abs_tolerance = 1
# We use higher tolerance for int64 data (bias of int16-quantized model)
if tensor["weights"].dtype == 'int64':
--- /dev/null
+{
+ "scale": 0.00014983004075475037,
+ "zero_point": 0.0
+}
--- /dev/null
+{
+ "scale": 0.00014586378529202193,
+ "zero_point": 0.0
+}
--- /dev/null
+{
+ "scale": 0.00014956798986531794,
+ "zero_point": 0.0
+}
--- /dev/null
+{
+ "weights": [
+ 0
+ ]
+}
--- /dev/null
+{
+ "min": -4.909480743408203,
+ "max": 4.779518718719482
+}
--- /dev/null
+{
+ "min": -4.073143873214722,
+ "max": 4.779518718719482
+}
--- /dev/null
+{
+ "min": -4.9008944129943846,
+ "max": 4.620573101043701
+}
--- /dev/null
+{
+ "scale": 0.038689617067575455,
+ "zero_point": 128.0
+}
--- /dev/null
+{
+ "scale": 0.035256847739219666,
+ "zero_point": 123.0
+}
--- /dev/null
+{
+ "scale": 0.0385618582367897,
+ "zero_point": 129.0
+}
--- /dev/null
+{
+ "weights": [
+ 0
+ ]
+}
--- /dev/null
+{
+ "min": -4.959668273925781,
+ "max": 4.906183891296386
+}
--- /dev/null
+{
+ "min": -4.3535110282897955,
+ "max": 4.636985759735107
+}
--- /dev/null
+{
+ "min": -4.959668273925781,
+ "max": 4.8736056804656975
+}
addTest(PRelu_001 channel int16)
addTest(ReLU_000 layer uint8)
addTest(ReLU_000 channel int16)
+addTest(Split_000 channel uint8)
+addTest(Split_000 channel int16)
addTest(TransposeConv_001 channel uint8)
addTest(TransposeConv_001 channel int16)
addTest(TransposeConv_001 layer uint8)
--- /dev/null
+ 3.241328 , 2.7033713 ,-2.5329788 ,-4.078369 ,-3.6711028 , 2.8912613 , 0.6188993 , 3.3729403 , 2.9906578 , 0.69040877, 0.6443222 , 1.1676162
--- /dev/null
+ 1.572614 , 3.6147017 , 1.4378501 ,-0.81497866, 1.5987366 , 3.7698908 ,-3.8637109 , 4.5728784 ,-0.8706349 , 0.7389268 , 4.64117 ,-0.96047217
--- /dev/null
+ 0.00864919,-3.1653113 ,-2.125551 , 2.9225516 ,-1.1439148 , 4.6509814 ,-2.097259 , 2.5843353 ,-2.067207 ,-2.5034845 ,-4.9441104 ,-3.9062042
--- /dev/null
+ 1.0920542 , 0.5510192 , 1.3465579 ,-2.3510268 , 4.016736 , 4.7848744 ,-0.42403316, 0.00571597, 1.6412207 , 1.7787368 , 2.4728034 ,-3.5900247
--- /dev/null
+-2.9799085,-3.9477375, 0.6402844, 3.304766 , 3.8880465,-3.5069442,-2.3702915, 4.126247 ,-3.1614416, 2.9909244,-2.8755414, 0.2627986
--- /dev/null
+-2.327701 , 1.9312059 ,-2.0069487 ,-1.2584914 ,-0.08435626, 0.47685367,-2.7456024 , 2.1275337 ,-4.9685698 , 1.8143541 , 0.52829266,-2.770121
--- /dev/null
+ 0.01133719,-3.3741624 , 3.556686 ,-4.21059 , 0.49977505, 1.768375 , 3.867543 , 2.270572 ,-3.9507272 ,-4.595618 ,-4.7460327 , 0.5856542
--- /dev/null
+-2.7181 , 4.6819983 , 2.9022477 ,-0.10716935, 3.6687856 ,-2.5403244 ,-4.477037 , 2.5499978 ,-3.9294813 , 0.08725335,-2.243345 ,-1.4018577
--- /dev/null
+-3.920553 , 0.87464577,-1.0319884 , 2.1885726 , 2.755115 ,-1.6436632 ,-4.4507327 , 4.915525 , 2.9331517 , 4.7712016 , 4.676084 ,-1.7715888
--- /dev/null
+-2.181168 ,-1.6011912 ,-4.359466 ,-1.3662407 ,-0.06876431,-2.9213328 ,-0.5463467 ,-3.7916536 ,-3.751455 ,-2.822578 , 0.8914152 ,-3.0267959
set_target_properties(pp PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(pp PUBLIC include)
target_link_libraries(pp PRIVATE nncc_common)
+target_link_libraries(pp PUBLIC nncc_coverage)
if(NOT ENABLE_TEST)
return()
for j in range(len(input_details)):
input_detail = input_details[j]
- # Generate random input [-5, 5)
- input_data = np.array(10 * np.random.random_sample(input_detail["shape"]) - 5,
- input_detail["dtype"])
+ print(input_detail["dtype"])
+ if input_detail["dtype"] == np.bool_:
+ # Generate random bool [0, 1]
+ input_data = np.array(
+ np.random.random_integers(0, 1, input_detail["shape"]),
+ input_detail["dtype"])
+ elif input_detail["dtype"] == np.float32:
+ # Generate random input [-5, 5)
+ input_data = np.array(10 * np.random.random_sample(input_detail["shape"]) - 5,
+ input_detail["dtype"])
sample.create_dataset(str(j), data=input_data)
h5_file.close()
--input_data "${BIN_PATH}/${TESTCASE}.tflite.input.h5" \
--output_model "${BIN_PATH}/${TESTCASE}.out.circle"
+ if [[ $? -ne 0 ]]; then
+ echo "FAILED TO GENERATE CIRCLE OUTPUT"
+ continue
+ fi
+
+ # Run record-minmax with auto generated random input
+ "${RECORD_MINMAX_PATH}" \
+ --input_model "${TESTCASE_FILE}.circle" \
+ --output_model "${BIN_PATH}/${TESTCASE}.outr.circle"
+
if [[ $? -eq 0 ]]; then
touch "${PASSED_TAG}"
fi
target_link_libraries(record-minmax arser)
target_link_libraries(record-minmax safemain)
target_link_libraries(record-minmax luci_import)
+target_link_libraries(record-minmax luci_env)
target_link_libraries(record-minmax luci_export)
target_link_libraries(record-minmax luci_interpreter)
target_link_libraries(record-minmax vconone)
+target_link_libraries(record-minmax nncc_coverage)
install(TARGETS record-minmax DESTINATION bin)
return()
endif(NOT ENABLE_TEST)
+file(GLOB_RECURSE TESTS "tests/*.test.cpp")
+
nnas_find_package(GTest REQUIRED)
-GTest_AddTest(record_minmax_function_test "${CMAKE_CURRENT_SOURCE_DIR}/tests/RecordFunction.test.cpp")
+GTest_AddTest(record_minmax_function_test "${TESTS}")
target_include_directories(record_minmax_function_test PRIVATE include)
+target_link_libraries(record_minmax_function_test nncc_coverage)
#include <arser/arser.h>
#include <vconone/vconone.h>
+#include <luci/UserSettings.h>
+
void print_version(void)
{
std::cout << "record-minmax version " << vconone::get_string() << std::endl;
using namespace record_minmax;
arser::Arser arser(
- "Embedding min/max values of activations to the circle model for post-training quantization");
+ "Embedding min/max values of activations to the circle model for post-training quantization");
arser.add_argument("--version")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("Show version information and exit")
- .exit_with(print_version);
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("Show version information and exit")
+ .exit_with(print_version);
arser.add_argument("--input_model")
- .nargs(1)
- .type(arser::DataType::STR)
- .required(true)
- .help("Input model filepath");
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .required(true)
+ .help("Input model filepath");
arser.add_argument("--input_data")
- .nargs(1)
- .type(arser::DataType::STR)
- .required(true)
- .help("Input data filepath");
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .required(false)
+ .help("Input data filepath. If not given, record-minmax will run with randomly generated data. "
+ "Note that the random dataset does not represent inference workload, leading to poor "
+ "model accuracy.");
arser.add_argument("--output_model")
- .nargs(1)
- .type(arser::DataType::STR)
- .required(true)
- .help("Output model filepath");
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .required(true)
+ .help("Output model filepath");
arser.add_argument("--min_percentile")
- .nargs(1)
- .type(arser::DataType::FLOAT)
- .help("Record n'th percentile of min");
+ .nargs(1)
+ .type(arser::DataType::FLOAT)
+ .help("Record n'th percentile of min");
arser.add_argument("--max_percentile")
- .nargs(1)
- .type(arser::DataType::FLOAT)
- .help("Record n'th percentile of max");
+ .nargs(1)
+ .type(arser::DataType::FLOAT)
+ .help("Record n'th percentile of max");
arser.add_argument("--mode")
- .nargs(1)
- .type(arser::DataType::STR)
- .help("Record mode. percentile (default) or moving_average");
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .help("Record mode. percentile (default) or moving_average");
+
+ arser.add_argument("--generate_profile_data")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will turn on profiling data generation.");
try
{
return 255;
}
+ auto settings = luci::UserSettings::settings();
+
auto input_model_path = arser.get<std::string>("--input_model");
- auto input_data_path = arser.get<std::string>("--input_data");
auto output_model_path = arser.get<std::string>("--output_model");
// Default values
if (mode != "percentile" && mode != "moving_average")
throw std::runtime_error("Unsupported mode");
+ if (arser["--generate_profile_data"])
+ settings->set(luci::UserSettings::Key::ProfilingDataGen, true);
+
RecordMinMax rmm;
// Initialize interpreter and observer
rmm.initialize(input_model_path);
- // Profile min/max while executing the given input data
- rmm.profileData(mode, input_data_path, min_percentile, max_percentile);
+ if (arser["--input_data"])
+ {
+ auto input_data_path = arser.get<std::string>("--input_data");
+
+ // Profile min/max while executing the given input data
+ rmm.profileData(mode, input_data_path, min_percentile, max_percentile);
+ }
+ else
+ {
+ // Profile min/max while executing random input data
+ rmm.profileDataWithRandomInputs(mode, min_percentile, max_percentile);
+ }
// Save profiled values to the model
rmm.saveModel(output_model_path);
float percent_i = static_cast<float>(index) / static_cast<float>(copy.size() - 1);
float fraction =
- (percentile / 100.0 - percent_i) / ((index + 1.0) / (copy.size() - 1.0) - percent_i);
+ (percentile / 100.0 - percent_i) / ((index + 1.0) / (copy.size() - 1.0) - percent_i);
float res = copy[index] + fraction * (copy[index + 1] - copy[index]);
return res;
}
void profileData(const std::string &mode, const std::string &input_data_path,
float min_percentile, float max_percentile);
+ void profileDataWithRandomInputs(const std::string &mode, float min_percentile,
+ float max_percentile);
+
void saveModel(const std::string &output_model_path);
private:
require("luci")
+require("luci-interpreter")
require("safemain")
require("arser")
require("vconone")
{
return DataType::S64;
}
- // Only support three datatypes for now
+ if (h5_type.getClass() == H5T_class_t::H5T_ENUM)
+ {
+ // We follow the numpy format
+ // In numpy 1.19.0, np.bool_ is saved as H5T_ENUM
+ // - (name, value) -> (FALSE, 0) and (TRUE, 1)
+ // - value dtype is H5T_STD_I8LE
+ // TODO Find a general way to recognize BOOL type
+ char name[10];
+ int8_t value[2] = {0, 1};
+ if (H5Tenum_nameof(h5_type.getId(), value, name, 10) < 0)
+ return DataType::Unknown;
+
+ if (std::string(name) != "FALSE")
+ return DataType::Unknown;
+
+ if (H5Tenum_nameof(h5_type.getId(), value + 1, name, 10) < 0)
+ return DataType::Unknown;
+
+ if (std::string(name) != "TRUE")
+ return DataType::Unknown;
+
+ return DataType::BOOL;
+ }
+ // TODO Support more datatypes
return DataType::Unknown;
}
case DataType::S64:
readTensorData(tensor, static_cast<int64_t *>(buffer));
break;
+ case DataType::BOOL:
+ readTensorData(tensor, static_cast<uint8_t *>(buffer));
+ break;
default:
throw std::runtime_error{"Unsupported data type for input data (.h5)"};
}
#include <luci/IR/CircleOpcode.h>
+#include <math.h>
+
using DataType = luci_interpreter::DataType;
namespace record_minmax
return;
}
+ if (node->dtype() == DataType::BOOL)
+ {
+ // Bool type tensor is not quantized
+ return;
+ }
+
// Only support recording of float32 values
if (tensor->element_type() != DataType::FLOAT32)
throw std::runtime_error("Tensor's data type is not float");
const auto num_elements = tensor->shape().num_elements();
std::vector<float> buf(data, data + num_elements);
- auto minmax = std::minmax_element(buf.begin(), buf.end());
- float min = *minmax.first;
- float max = *minmax.second;
+
+ float max = std::numeric_limits<float>::lowest();
+ float min = std::numeric_limits<float>::max();
+
+ bool all_nan = true;
+ for (auto number : buf)
+ {
+ if (isnan(number))
+ continue;
+
+ all_nan = false;
+
+ if (number > max)
+ max = number;
+
+ if (number < min)
+ min = number;
+ }
+
+ if (all_nan)
+ throw std::runtime_error("All values are NaN(Not a Number)");
_minmax_data.recordMinMax(node, min, max);
}
#include <numeric>
#include <stdexcept>
#include <iostream>
+#include <random>
using Shape = luci_interpreter::Shape;
using DataType = luci_interpreter::DataType;
namespace
{
+std::vector<uint8_t> genRandomBoolData(std::mt19937 &gen, uint32_t num_elements)
+{
+ std::uniform_int_distribution<> dist(0, 1);
+ std::vector<uint8_t> input_data(num_elements);
+
+ // Write random data
+ for (auto &iter : input_data)
+ iter = static_cast<uint8_t>(dist(gen));
+
+ return input_data;
+}
+
/**
* @brief getTensorSize will return size in bytes
*/
}
}
+void update_quantparam(record_minmax::MinMaxObserver *observer, const std::string &mode,
+ float min_percentile, float max_percentile)
+{
+ auto minmax_map = observer->minMaxData()->getMap();
+ for (auto iter = minmax_map->begin(); iter != minmax_map->end(); ++iter)
+ {
+ auto node = iter->first;
+ auto minmax = iter->second;
+
+ float min{0.0f}, max{0.0f};
+ if (mode == "percentile")
+ {
+ min = record_minmax::getNthPercentile(minmax.min_vector, min_percentile);
+ max = record_minmax::getNthPercentile(minmax.max_vector, max_percentile);
+ }
+ else if (mode == "moving_average")
+ {
+ min = record_minmax::getMovingAverage(minmax.min_vector, 0.9, 16, true);
+ max = record_minmax::getMovingAverage(minmax.max_vector, 0.9, 16, false);
+ }
+ assert(mode == "percentile" || mode == "moving_average");
+ auto quantparam = std::make_unique<luci::CircleQuantParam>();
+ quantparam->min.push_back(min);
+ quantparam->max.push_back(max);
+
+ assert(node->quantparam() == nullptr);
+
+ auto mutable_node = const_cast<luci::CircleNode *>(node);
+ mutable_node->quantparam(std::move(quantparam));
+ }
+}
+
} // namespace
namespace record_minmax
throw std::runtime_error("HDF5 error occurred.");
}
- auto minmax_map = _observer->minMaxData()->getMap();
- for (auto iter = minmax_map->begin(); iter != minmax_map->end(); ++iter)
+ update_quantparam(_observer.get(), mode, min_percentile, max_percentile);
+}
+
+void RecordMinMax::profileDataWithRandomInputs(const std::string &mode, float min_percentile,
+ float max_percentile)
+{
+ // We use three randomly-generated records
+ const uint32_t num_records = 3;
+
+ const auto input_nodes = loco::input_nodes(_module->graph());
+ const auto num_inputs = input_nodes.size();
+
+ std::random_device rd;
+ std::mt19937 gen(rd());
+ std::uniform_real_distribution<> dist(-5, 5);
+
+ for (int32_t record_idx = 0; record_idx < num_records; record_idx++)
{
- auto node = iter->first;
- auto minmax = iter->second;
+ std::cout << "Recording " << record_idx << "'th data" << std::endl;
- float min{0.0f}, max{0.0f};
- if (mode == "percentile")
+ for (int32_t input_idx = 0; input_idx < num_inputs; input_idx++)
{
- min = getNthPercentile(minmax.min_vector, min_percentile);
- max = getNthPercentile(minmax.max_vector, max_percentile);
- }
- else if (mode == "moving_average")
- {
- min = getMovingAverage(minmax.min_vector, 0.9, 16, true);
- max = getMovingAverage(minmax.max_vector, 0.9, 16, false);
- }
- assert(mode == "percentile" || mode == "moving_average");
- auto quantparam = std::make_unique<luci::CircleQuantParam>();
- quantparam->min.push_back(min);
- quantparam->max.push_back(max);
+ const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
+ assert(input_node->index() == input_idx);
+ uint32_t num_elements = 1;
+ for (uint32_t i = 0; i < input_node->rank(); i++)
+ {
+ if (!input_node->dim(i).known())
+ throw std::runtime_error("Input dimension must be known");
- assert(node->quantparam() == nullptr);
+ num_elements *= input_node->dim(i).value();
+ }
- auto mutable_node = const_cast<luci::CircleNode *>(node);
- mutable_node->quantparam(std::move(quantparam));
+ if (num_elements == 0)
+ throw std::runtime_error("Only support non-zero sized inputs");
+
+ // TODO Support more input data types
+ assert(input_node->dtype() == loco::DataType::FLOAT32 ||
+ input_node->dtype() == loco::DataType::BOOL);
+
+ if (input_node->dtype() == DataType::FLOAT32)
+ // clang-format off
+ {
+ std::vector<float> input_data(num_elements);
+
+ // Write random data
+ for (auto &iter : input_data)
+ iter = static_cast<float>(dist(gen));
+
+ // TODO: Input data is copied twice (file -> buffer (input_data) -> interpreter inputs)
+ // We can redcue the copy by directly writing data from file to interpreter inputs
+ _interpreter->writeInputTensor(input_node, input_data.data(),
+ input_data.size() * sizeof(float));
+ }
+ // clang-format on
+ else if (input_node->dtype() == DataType::BOOL)
+ {
+ auto input_data = genRandomBoolData(gen, num_elements);
+ _interpreter->writeInputTensor(input_node, input_data.data(),
+ input_data.size() * sizeof(uint8_t));
+ }
+ }
+
+ _interpreter->interpret();
}
+
+ std::cout << "Recording finished. Number of recorded data: " << num_records << std::endl;
+
+ update_quantparam(_observer.get(), mode, min_percentile, max_percentile);
}
void RecordMinMax::saveModel(const std::string &output_model_path)
SUCCEED();
}
+TEST(GetMovingAverageTest, Simple)
+{
+ std::vector<float> input{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+
+ EXPECT_NE(0, getMovingAverage(input, 0.5, 4, true));
+ EXPECT_NE(0, getMovingAverage(input, 0.5, 4, false));
+}
+
} // namespace record_minmax
float _stddev;
};
+class GaussianInt16DataChef final : public DataChef
+{
+public:
+ GaussianInt16DataChef(float mean, float stddev) : _mean{mean}, _stddev{stddev}
+ {
+ // DO NOTHING
+ }
+
+public:
+ std::vector<uint8_t> generate(int32_t count) const override;
+
+private:
+ float _mean;
+ float _stddev;
+};
+
class GaussianUint8DataChef final : public DataChef
{
public:
std::unique_ptr<DataChef> create(const Arguments &args) const;
};
+struct GaussianInt16DataChefFactory : public DataChefFactory
+{
+ std::unique_ptr<DataChef> create(const Arguments &args) const;
+};
+
struct GaussianUint8DataChefFactory : public DataChefFactory
{
std::unique_ptr<DataChef> create(const Arguments &args) const;
DATA_CHEF(FLOAT32, constant, ConstantDataChefFactory<float>)
DATA_CHEF(BOOL, constant, ConstantDataChefFactory<bool>)
DATA_CHEF(UINT8, constant, ConstantDataChefFactory<uint8_t>)
+DATA_CHEF(INT16, constant, ConstantDataChefFactory<int16_t>)
DATA_CHEF(INT32, constant, ConstantDataChefFactory<int32_t>)
DATA_CHEF(INT64, constant, ConstantDataChefFactory<int64_t>)
DATA_CHEF(INT64, explicit, ExplicitDataChefFactory<int64_t>)
DATA_CHEF(INT32, explicit, ExplicitDataChefFactory<int32_t>)
+DATA_CHEF(INT16, explicit, ExplicitDataChefFactory<int16_t>)
DATA_CHEF(UINT8, explicit, ExplicitDataChefFactory<uint8_t>)
DATA_CHEF(BOOL, explicit, ExplicitDataChefFactory<bool>)
DATA_CHEF(FLOAT32, explicit, ExplicitDataChefFactory<float>)
DATA_CHEF(FLOAT32, gaussian, GaussianFloat32DataChefFactory)
DATA_CHEF(INT32, gaussian, GaussianInt32DataChefFactory)
+DATA_CHEF(INT16, gaussian, GaussianInt16DataChefFactory)
DATA_CHEF(UINT8, gaussian, GaussianUint8DataChefFactory)
namespace souschef
{
-std::vector<uint8_t> GaussianFloat32DataChef::generate(int32_t count) const
+template <typename T>
+static std::vector<uint8_t> generate_gaussian(int32_t count, float mean, float stddev,
+ std::minstd_rand::result_type seed)
{
- // TODO Support seed value override
- auto seed = std::chrono::system_clock::now().time_since_epoch().count();
-
std::minstd_rand rand{static_cast<std::minstd_rand::result_type>(seed)};
- std::normal_distribution<float> dist{_mean, _stddev};
+ std::normal_distribution<float> dist{mean, stddev};
std::vector<uint8_t> res;
+ constexpr float max_cap = std::numeric_limits<T>::max();
+ constexpr float min_cap = std::numeric_limits<T>::min();
for (uint32_t n = 0; n < count; ++n)
{
- auto const value = dist(rand);
+ float raw_value = dist(rand);
+ const float capped_value = std::max(min_cap, std::min(max_cap, raw_value));
+ auto const value = static_cast<T>(capped_value);
auto const arr = reinterpret_cast<const uint8_t *>(&value);
- for (uint32_t b = 0; b < sizeof(float); ++b)
+ for (uint32_t b = 0; b < sizeof(T); ++b)
{
res.emplace_back(arr[b]);
}
return res;
}
-std::vector<uint8_t> GaussianInt32DataChef::generate(int32_t count) const
+template <typename T>
+static std::vector<uint8_t> generate_gaussian(int32_t count, float mean, float stddev)
{
- // TODO Support seed value override
- auto seed = std::chrono::system_clock::now().time_since_epoch().count();
+ auto time_stamp = std::chrono::system_clock::now().time_since_epoch().count();
- std::minstd_rand rand{static_cast<std::minstd_rand::result_type>(seed)};
- std::normal_distribution<float> dist{_mean, _stddev};
+ // Note this is implementation defined, change if needed.
+ auto seed = static_cast<std::minstd_rand::result_type>(time_stamp);
- std::vector<uint8_t> res;
+ return generate_gaussian<T>(count, mean, stddev, seed);
+}
- for (uint32_t n = 0; n < count; ++n)
- {
- auto const value = static_cast<int32_t>(dist(rand));
- auto const arr = reinterpret_cast<const uint8_t *>(&value);
+std::vector<uint8_t> GaussianFloat32DataChef::generate(int32_t count) const
+{
+ return generate_gaussian<float>(count, _mean, _stddev);
+}
- for (uint32_t b = 0; b < sizeof(int32_t); ++b)
- {
- res.emplace_back(arr[b]);
- }
- }
+std::vector<uint8_t> GaussianInt32DataChef::generate(int32_t count) const
+{
+ return generate_gaussian<int32_t>(count, _mean, _stddev);
+}
- return res;
+std::vector<uint8_t> GaussianInt16DataChef::generate(int32_t count) const
+{
+ return generate_gaussian<int16_t>(count, _mean, _stddev);
}
std::vector<uint8_t> GaussianUint8DataChef::generate(int32_t count) const
{
- // TODO Support seed value override
- auto seed = std::chrono::system_clock::now().time_since_epoch().count();
-
- std::minstd_rand rand{static_cast<std::minstd_rand::result_type>(seed)};
- std::normal_distribution<float> dist{_mean, _stddev};
-
- std::vector<uint8_t> res;
-
- for (uint32_t n = 0; n < count; ++n)
- {
- auto const value = static_cast<uint8_t>(dist(rand)); // uint8_t for data type
- auto const arr = reinterpret_cast<const uint8_t *>(&value); // uint8_t for byte streaming
-
- for (uint32_t b = 0; b < sizeof(uint8_t); ++b)
- {
- res.emplace_back(arr[b]);
- }
- }
-
- return res;
+ return generate_gaussian<uint8_t>(count, _mean, _stddev);
}
std::unique_ptr<DataChef> GaussianFloat32DataChefFactory::create(const Arguments &args) const
return std::unique_ptr<DataChef>{new GaussianInt32DataChef{mean, stddev}};
}
+std::unique_ptr<DataChef> GaussianInt16DataChefFactory::create(const Arguments &args) const
+{
+ if (args.count() != 2)
+ {
+ throw std::runtime_error{"invalid argument count: two arguments (mean/stddev) are expected"};
+ }
+
+ auto const mean = to_number<float>(args.value(0));
+ auto const stddev = to_number<float>(args.value(1));
+
+ return std::unique_ptr<DataChef>{new GaussianInt16DataChef{mean, stddev}};
+}
+
std::unique_ptr<DataChef> GaussianUint8DataChefFactory::create(const Arguments &args) const
{
if (args.count() != 2)
#include <cassert>
#include <limits>
+#include <stdexcept>
namespace souschef
{
template <> float to_number(const std::string &s) { return std::stof(s); }
template <> int to_number(const std::string &s) { return std::stoi(s); }
+template <> int16_t to_number(const std::string &s)
+{
+ // There are no standard function to parse int16_t or short int
+ // This function simulates behavior similar stoi, stol and stoll
+ int res = std::stol(s);
+ // standard does not specify string in error message, this is arbitrary
+ if (res < std::numeric_limits<int16_t>::min() || res > std::numeric_limits<int16_t>::max())
+ {
+ throw std::out_of_range("to_number<int16_t>");
+ }
+ return res;
+}
template <> int64_t to_number(const std::string &s) { return std::stoll(s); }
template <> uint8_t to_number(const std::string &s)
{
+++ /dev/null
-file(GLOB_RECURSE TESTS "src/*.test.cpp")
-
-add_library(stdex INTERFACE)
-target_include_directories(stdex INTERFACE include)
-
-if(NOT ENABLE_TEST)
- return()
-endif(NOT ENABLE_TEST)
-
-# Google Test is mandatory for test
-nnas_find_package(GTest REQUIRED)
-
-add_executable(stdex_test ${TESTS})
-target_link_libraries(stdex_test stdex)
-target_link_libraries(stdex_test gtest_main)
-add_test(stdex_test stdex_test)
+++ /dev/null
-# stdex
-
-`stdex` is an extension over standard C++ libraries.
-
-# How to use
-
-Please read each header files.
-
-One example of `stdex::make_unique(..)` in `compiler/stdex/Memory.h` is as follows:
-
-```cpp
-#include <stdex/Memory.h>
-
-using stdex::make_unique;
-
-class A { ... };
-
-...
-
-std::unique_ptr<A> a = make_unique<A>(); // Note: std::make_unique is not supported in C++ 11
-
-```
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __STDEX_MEMORY_H__
-#define __STDEX_MEMORY_H__
-
-#include <memory>
-
-namespace stdex
-{
-
-using std::make_unique;
-
-} // namespace stdex
-
-#endif // __STDEX_MEMORY_H__
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __STDEX_QUEUE_H__
-#define __STDEX_QUEUE_H__
-
-#include <queue>
-
-namespace stdex
-{
-
-/**
- * @brief Take the front (= first) element from the queue
- * @note The queue SHOULD have at least one element
- */
-template <typename T> T take(std::queue<T> &q)
-{
- auto res = q.front();
- q.pop();
- return res;
-}
-
-} // namespace stdex
-
-#endif // __STDEX_QUEUE_H__
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __STDEX_SET_H__
-#define __STDEX_SET_H__
-
-#include <set>
-
-template <typename T> bool operator==(const std::set<T> &lhs, const std::set<T> &rhs)
-{
- if (rhs.size() != lhs.size())
- {
- return false;
- }
-
- for (const auto &element : lhs)
- {
- if (rhs.find(element) == rhs.end())
- {
- return false;
- }
- }
-
- return true;
-}
-
-template <typename T> std::set<T> operator-(const std::set<T> &lhs, const std::set<T> &rhs)
-{
- std::set<T> res;
-
- for (const auto &element : lhs)
- {
- if (rhs.find(element) == rhs.end())
- {
- res.insert(element);
- }
- }
-
- return res;
-}
-
-#endif // __STDEX_SET_H__
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "stdex/Memory.h"
-
-#include <gtest/gtest.h>
-
-namespace
-{
-
-struct Stat
-{
- unsigned allocated = 0;
- unsigned freed = 0;
-};
-
-struct Counter
-{
-public:
- Counter(Stat *stat) : _stat{stat} { _stat->allocated += 1; }
-
-public:
- ~Counter() { _stat->freed += 1; }
-
-private:
- Stat *_stat;
-};
-
-} // namespace
-
-TEST(MemoryTest, make_unique)
-{
- Stat stat;
-
- ASSERT_EQ(stat.allocated, 0);
- ASSERT_EQ(stat.freed, 0);
-
- auto o = stdex::make_unique<::Counter>(&stat);
-
- ASSERT_EQ(stat.allocated, 1);
- ASSERT_EQ(stat.freed, 0);
-
- o.reset();
-
- ASSERT_EQ(stat.allocated, 1);
- ASSERT_EQ(stat.freed, 1);
-}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "stdex/Queue.h"
-
-#include <gtest/gtest.h>
-
-TEST(QueueTest, take)
-{
- std::queue<int> q;
-
- q.emplace(3);
- q.emplace(4);
- q.emplace(5);
-
- ASSERT_EQ(stdex::take(q), 3);
- ASSERT_EQ(stdex::take(q), 4);
- ASSERT_EQ(stdex::take(q), 5);
-}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "stdex/Set.h"
-
-#include <gtest/gtest.h>
-
-TEST(SET, operator_eq)
-{
- ASSERT_TRUE(std::set<int>({1, 2, 3}) == std::set<int>({1, 2, 3}));
- ASSERT_FALSE(std::set<int>({1, 3}) == std::set<int>({1, 2, 3}));
-}
-
-TEST(SET, operator_diff)
-{
- const std::set<int> lhs{1, 2, 3};
- const std::set<int> rhs{2, 4};
-
- auto res = lhs - rhs;
-
- ASSERT_EQ(res.size(), 2);
- ASSERT_NE(res.find(1), res.end());
- ASSERT_NE(res.find(3), res.end());
-}
target_link_libraries(tf2circle PRIVATE exo)
target_link_libraries(tf2circle PRIVATE locop)
target_link_libraries(tf2circle PRIVATE hermes_std)
-target_link_libraries(tf2circle PRIVATE stdex)
target_link_libraries(tf2circle PRIVATE angkor cwrap)
target_link_libraries(tf2circle PRIVATE tf2circle_customop_info_proto)
-require("stdex")
require("hermes-std")
require("moco-tf")
require("exo")
#include <hermes/ConsoleReporter.h>
#include <hermes/EnvConfig.h>
-#include <stdex/Memory.h>
-
#include <cassert>
-
+#include <memory>
#include <iostream>
#include <stdexcept>
#include <string>
if (ctx == nullptr)
{
ctx = new hermes::Context;
- ctx->sinks()->append(stdex::make_unique<hermes::ConsoleReporter>());
- ctx->config(stdex::make_unique<EnvConfig>("TF2CIRCLE_Log"));
+ ctx->sinks()->append(std::make_unique<hermes::ConsoleReporter>());
+ ctx->config(std::make_unique<EnvConfig>("TF2CIRCLE_Log"));
}
return ctx;
using EnvConfig = hermes::EnvConfig<hermes::EnvFormat::BooleanNumber>;
// This line allows users to control all the moco-tf loggers via TF2CIRCLE_Log_Frontend
- moco::LoggingContext::get()->config(stdex::make_unique<EnvConfig>("TF2CIRCLE_Log_Frontend"));
+ moco::LoggingContext::get()->config(std::make_unique<EnvConfig>("TF2CIRCLE_Log_Frontend"));
// This line allows users to control all the exo-circle loggers via TF2CIRCLE_Log_Backend
- exo::LoggingContext::get()->config(stdex::make_unique<EnvConfig>("TF2CIRCLE_Log_Backend"));
+ exo::LoggingContext::get()->config(std::make_unique<EnvConfig>("TF2CIRCLE_Log_Backend"));
LOGGER(l);
target_link_libraries(tf2nnpkg PRIVATE exo)
target_link_libraries(tf2nnpkg PRIVATE locop)
target_link_libraries(tf2nnpkg PRIVATE hermes_std)
-target_link_libraries(tf2nnpkg PRIVATE stdex)
target_link_libraries(tf2nnpkg PRIVATE angkor cwrap)
install(TARGETS tf2nnpkg DESTINATION bin)
-require("stdex")
require("hermes-std")
require("moco-tf")
require("exo")
#include <hermes/ConsoleReporter.h>
#include <hermes/EnvConfig.h>
-#include <stdex/Memory.h>
-
+#include <memory>
#include <iostream>
#include <fstream>
#include <functional>
if (ctx == nullptr)
{
ctx = new hermes::Context;
- ctx->sinks()->append(stdex::make_unique<hermes::ConsoleReporter>());
- ctx->config(stdex::make_unique<EnvConfig>("TF2NNPKG_Log"));
+ ctx->sinks()->append(std::make_unique<hermes::ConsoleReporter>());
+ ctx->config(std::make_unique<EnvConfig>("TF2NNPKG_Log"));
}
return ctx;
using EnvConfig = hermes::EnvConfig<hermes::EnvFormat::BooleanNumber>;
// This line allows users to control all the moco-tf loggers via TF2NNPKG_Log_Frontend
- moco::LoggingContext::get()->config(stdex::make_unique<EnvConfig>("TF2NNPKG_Log_Frontend"));
+ moco::LoggingContext::get()->config(std::make_unique<EnvConfig>("TF2NNPKG_Log_Frontend"));
// This line allows users to control all the exo-circle loggers via TF2NNPKG_Log_Backend
- exo::LoggingContext::get()->config(stdex::make_unique<EnvConfig>("TF2NNPKG_Log_Backend"));
+ exo::LoggingContext::get()->config(std::make_unique<EnvConfig>("TF2NNPKG_Log_Backend"));
LOGGER(l);
target_link_libraries(tf2tflite PRIVATE exo)
target_link_libraries(tf2tflite PRIVATE locop)
target_link_libraries(tf2tflite PRIVATE hermes_std)
-target_link_libraries(tf2tflite PRIVATE stdex)
target_link_libraries(tf2tflite PRIVATE angkor cwrap)
target_link_libraries(tf2tflite PRIVATE tf2tflite_customop_info_proto)
install(TARGETS tf2tflite DESTINATION bin)
-require("stdex")
require("hermes-std")
require("moco-tf")
require("exo")
#include <hermes/ConsoleReporter.h>
#include <hermes/EnvConfig.h>
-#include <stdex/Memory.h>
-
#include <cassert>
-
+#include <memory>
#include <iostream>
#include <stdexcept>
#include <string>
if (ctx == nullptr)
{
ctx = new hermes::Context;
- ctx->sinks()->append(stdex::make_unique<hermes::ConsoleReporter>());
- ctx->config(stdex::make_unique<EnvConfig>("TF2TFLITE_Log"));
+ ctx->sinks()->append(std::make_unique<hermes::ConsoleReporter>());
+ ctx->config(std::make_unique<EnvConfig>("TF2TFLITE_Log"));
}
return ctx;
using EnvConfig = hermes::EnvConfig<hermes::EnvFormat::BooleanNumber>;
// This line allows users to control all the moco-tf loggers via TF2TFLITE_Log_Frontend
- moco::LoggingContext::get()->config(stdex::make_unique<EnvConfig>("TF2TFLITE_Log_Frontend"));
+ moco::LoggingContext::get()->config(std::make_unique<EnvConfig>("TF2TFLITE_Log_Frontend"));
// This line allows users to control all the exo-tflite loggers via TF2TFLITE_Log_Backend
- exo::LoggingContext::get()->config(stdex::make_unique<EnvConfig>("TF2TFLITE_Log_Backend"));
+ exo::LoggingContext::get()->config(std::make_unique<EnvConfig>("TF2TFLITE_Log_Backend"));
LOGGER(l);
raise ValueError("--input_arrays must be provided")
if not flags.output_arrays:
raise ValueError("--output_arrays must be provided")
+ input_shapes = []
+ if flags.input_shapes:
+ input_shapes = [
+ _parse_array(shape, type_fn=int)
+ for shape in flags.input_shapes.split(":")
+ ]
+ if len(input_shapes) != len(_parse_array(flags.input_arrays)):
+ raise ValueError(
+ "--input_shapes and --input_arrays must have the same length")
file_content = open(flags.input_path, 'rb').read()
try:
graph_def = tf.compat.v1.GraphDef()
_str + ":0" if len(_str.split(":")) == 1 else _str
for _str in _parse_array(flags.output_arrays)
])
+ for i in range(len(input_shapes)):
+ wrap_func.inputs[i].set_shape(input_shapes[i])
converter = tf.lite.TFLiteConverter.from_concrete_functions([wrap_func])
if flags.model_format == "saved_model":
target_include_directories(tfinfo_v2 PUBLIC include)
target_link_libraries(tfinfo_v2 PRIVATE tfinfo_v2_proto)
target_link_libraries(tfinfo_v2 PRIVATE oops)
-target_link_libraries(tfinfo_v2 PRIVATE stdex)
if(NOT ENABLE_TEST)
return()
}
TensorSignature(const Kind kind, const std::string &name, const ShapeHint &shape_hint)
- : TensorSignature(kind, name)
+ : TensorSignature(kind, name)
{
_shape_hint = shape_hint;
}
require("oops")
-require("stdex")
name : "relu:0"
}
),
- // clang-format on
+ // clang-format on
};
} // namespace
input, a:0, TF_FLOAT, [2, 3 ,4]
output, b:0, TF_FLOAT, [2, 3 ,4]
)",
- // clang-format on
+ // clang-format on
};
} // namespace
#include "tfinfo-v2/TensorSignature.h"
#include <oops/UserExn.h>
-#include <stdex/Memory.h>
#include <tfinfo-v2.pb.h>
#include <google/protobuf/io/zero_copy_stream_impl.h>
#include <google/protobuf/text_format.h>
+#include <memory>
#include <fstream>
#include <fcntl.h>
auto name = input_def.name();
validate_tensor_name(name, path);
- auto tensor = stdex::make_unique<tfinfo::v2::TensorSignature>(
- tfinfo::v2::TensorSignature::Kind::Input, name);
+ auto tensor = std::make_unique<tfinfo::v2::TensorSignature>(
+ tfinfo::v2::TensorSignature::Kind::Input, name);
// when there is dim attribute for unknown shape
if (input_def.dim_size() > 0)
auto name = info_def.output().Get(i).name();
validate_tensor_name(name, path);
- auto tensor = stdex::make_unique<tfinfo::v2::TensorSignature>(
- tfinfo::v2::TensorSignature::Kind::Output, name);
+ auto tensor = std::make_unique<tfinfo::v2::TensorSignature>(
+ tfinfo::v2::TensorSignature::Kind::Output, name);
tensors.emplace_back(std::move(tensor));
}
}
add_library(tfinfo STATIC ${SOURCES})
set_target_properties(tfinfo PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(tfinfo PUBLIC include)
-target_link_libraries(tfinfo stdex angkor oops)
+target_link_libraries(tfinfo angkor oops)
# TODO Remove "nnkit_support_tftestinfo" later
add_library(nnkit_support_tftestinfo ALIAS tfinfo)
ParsedTensor(const Kind kind, const std::string &name, const DataType &dtype,
const std::vector<int32_t> &shape)
- : _kind(kind), _dtype(dtype)
+ : _kind(kind), _dtype(dtype)
{
_tensor_name.assign(name);
_shape.dim(rank) = shape.at(rank);
}
- ~ParsedTensor() { /* empty */}
+ ~ParsedTensor()
+ { /* empty */
+ }
public:
Kind kind() const { return _kind; }
-require("stdex")
require("angkor")
require("oops")
#include "Compat.h"
#include <oops/UserExn.h>
-#include <stdex/Memory.h>
#include <nncc/core/ADT/tensor/Shape.h>
#include <cctype>
shape.emplace_back(std::stoi(dim));
}
- return stdex::make_unique<ParsedTensor>(kind, name, dtype, shape);
+ return std::make_unique<ParsedTensor>(kind, name, dtype, shape);
}
#undef CHECK_NOT_NULL
file(GLOB_RECURSE SOURCES "src/*.cpp")
add_executable(tfkit ${SOURCES})
-target_link_libraries(tfkit PRIVATE stdex)
target_link_libraries(tfkit PRIVATE cli)
target_link_libraries(tfkit PRIVATE mio_tf)
target_link_libraries(tfkit PRIVATE nncc_common)
#include "ConvertCommand.hpp"
#include "Support.hpp"
-#include <stdex/Memory.h>
-
#include <tensorflow/core/framework/graph.pb.h>
#include <google/protobuf/io/coded_stream.h>
#include <google/protobuf/text_format.h>
#include <google/protobuf/util/json_util.h>
+#include <memory>
#include <cassert>
#include <map>
#include <string>
std::map<std::string, std::unique_ptr<Importer>> importers;
- importers["pb"] = stdex::make_unique<ImporterImpl<DataFormat::PBBIN>>();
- importers["pbtxt"] = stdex::make_unique<ImporterImpl<DataFormat::PBTXT>>();
+ importers["pb"] = std::make_unique<ImporterImpl<DataFormat::PBBIN>>();
+ importers["pbtxt"] = std::make_unique<ImporterImpl<DataFormat::PBTXT>>();
std::map<std::string, std::unique_ptr<Exporter>> exporters;
- exporters["json"] = stdex::make_unique<ExporterImpl<DataFormat::JSON>>();
+ exporters["json"] = std::make_unique<ExporterImpl<DataFormat::JSON>>();
auto importer = importers.at(input_format).get();
auto exporter = exporters.at(output_format).get();
#include "ConvertCommand.hpp"
#include <cli/App.h>
-#include <stdex/Memory.h>
+
+#include <memory>
int main(int argc, char **argv)
{
cli::App app{argv[0]};
- app.insert("encode", stdex::make_unique<tfkit::EncodeCommand>());
- app.insert("decode", stdex::make_unique<tfkit::DecodeCommand>());
- app.insert("unpack", stdex::make_unique<tfkit::UnpackCommand>());
- app.insert("pack", stdex::make_unique<tfkit::PackCommand>());
- app.insert("convert", stdex::make_unique<tfkit::ConvertCommand>());
+ app.insert("encode", std::make_unique<tfkit::EncodeCommand>());
+ app.insert("decode", std::make_unique<tfkit::DecodeCommand>());
+ app.insert("unpack", std::make_unique<tfkit::UnpackCommand>());
+ app.insert("pack", std::make_unique<tfkit::PackCommand>());
+ app.insert("convert", std::make_unique<tfkit::ConvertCommand>());
return app.run(argc - 1, argv + 1);
}
}
input_tensor->set_tensor_content(std::string(
- reinterpret_cast<const char *>(tensor_content.data()), sizeof(float) * input_flat_size));
+ reinterpret_cast<const char *>(tensor_content.data()), sizeof(float) * input_flat_size));
input_tensor->clear_float_val();
}
}
input_tensor->set_tensor_content(std::string(
- reinterpret_cast<const char *>(tensor_content.data()), sizeof(int32_t) * input_flat_size));
+ reinterpret_cast<const char *>(tensor_content.data()), sizeof(int32_t) * input_flat_size));
input_tensor->clear_int_val();
}
#include "Support.hpp"
-#include <stdex/Memory.h>
-
#include <tensorflow/core/framework/graph.pb.h>
+#include <memory>
#include <cassert>
#include <fstream>
#include <stdexcept>
return nullptr;
}
- auto stream = stdex::make_unique<T>(path.c_str(), mode);
+ auto stream = std::make_unique<T>(path.c_str(), mode);
if (!stream->is_open())
{
throw std::runtime_error{"ERROR: Failed to open " + path};
std::unique_ptr<IOConfiguration> make_ioconfig(const CmdArguments &cmdargs)
{
- auto iocfg = stdex::make_unique<IOConfiguration>();
+ auto iocfg = std::make_unique<IOConfiguration>();
auto in = open_fstream<std::ifstream>(cmdargs.get_or(0, "-"), std::ios::in | std::ios::binary);
iocfg->in(std::move(in));
public:
CmdArguments() = delete;
CmdArguments(int argc, const char *const *argv)
- : _argc(static_cast<unsigned int>(argc)), _argv{argv}
+ : _argc(static_cast<unsigned int>(argc)), _argv{argv}
{
}
input_tensor->clear_float_val();
const float *tensor_content =
- reinterpret_cast<const float *>(input_tensor->tensor_content().data());
+ reinterpret_cast<const float *>(input_tensor->tensor_content().data());
for (int i = 0; i < input_flat_size; i++)
{
input_tensor->add_float_val(tensor_content[i]);
input_tensor->clear_int_val();
const int32_t *tensor_content =
- reinterpret_cast<const int32_t *>(input_tensor->tensor_content().data());
+ reinterpret_cast<const int32_t *>(input_tensor->tensor_content().data());
for (int i = 0; i < input_flat_size; i++)
{
input_tensor->add_int_val(tensor_content[i]);
input_tensor->clear_int_val();
const int8_t *tensor_content =
- reinterpret_cast<const int8_t *>(input_tensor->tensor_content().data());
+ reinterpret_cast<const int8_t *>(input_tensor->tensor_content().data());
for (int i = 0; i < input_flat_size; i++)
{
input_tensor->add_int_val(tensor_content[i]);
input_tensor->clear_bool_val();
const bool *tensor_content =
- reinterpret_cast<const bool *>(input_tensor->tensor_content().data());
+ reinterpret_cast<const bool *>(input_tensor->tensor_content().data());
for (int i = 0; i < input_flat_size; i++)
{
input_tensor->add_bool_val(tensor_content[i]);
"Lite model files"};
arser.add_argument("--operators").nargs(0).help("Dump operators in tflite file");
arser.add_argument("--conv2d_weight")
- .nargs(0)
- .help("Dump Conv2D series weight operators in tflite file");
+ .nargs(0)
+ .help("Dump Conv2D series weight operators in tflite file");
arser.add_argument("--op_version").nargs(0).help("Dump versions of the operators in tflite file");
arser.add_argument("tflite").type(arser::DataType::STR).help("TFLite file to inspect");
return tflite::TensorType_INT64;
case tflchef::BOOL:
return tflite::TensorType_BOOL;
+ case tflchef::INT16:
+ return tflite::TensorType_INT16;
default:
break;
}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BroadcastTo.h"
+
+#include "flatbuffers/flexbuffers.h"
+
+flatbuffers::Offset<void> BroadcastToChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ return flatbuffers::Offset<void>();
+}
+
+flatbuffers::Offset<flatbuffers::Vector<uint8_t>>
+BroadcastToChef::custom_value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ auto &operation = (*_operation);
+
+ assert(operation.type() == "BroadcastTo");
+
+ /**
+ * REGISTER_OP("BroadcastTo")
+ .Input("input: T")
+ .Input("shape: Tidx")
+ .Output("output: T")
+ .Attr("T: type")
+ .Attr("Tidx: {int32, int64} = DT_INT32")
+ .SetShapeFn([](InferenceContext* c)
+ */
+
+ auto flex_buffers = std::make_unique<flexbuffers::Builder>();
+ size_t map_start = flex_buffers->StartMap();
+
+ // TODO Support more data types
+ flex_buffers->Int("T", tflite::TensorType_FLOAT32);
+ flex_buffers->Int("Tidx", tflite::TensorType_INT32);
+
+ flex_buffers->EndMap(map_start);
+ flex_buffers->Finish();
+
+ auto circle_custom_options = fbb.CreateVector(flex_buffers->GetBuffer());
+ return circle_custom_options;
+}
+
+std::unique_ptr<OpChef> BroadcastToChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new BroadcastToChef{operation}};
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_BROADCASTTO_H__
+#define __OP_BROADCASTTO_H__
+
+#include "OpChef.h"
+
+class BroadcastToChef final : public OpChef
+{
+public:
+ explicit BroadcastToChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_CUSTOM; }
+
+ tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_NONE; }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+ flatbuffers::Offset<flatbuffers::Vector<uint8_t>>
+ custom_value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct BroadcastToChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_BROADCASTTO_H__
{
public:
GeneratedModelImpl(std::unique_ptr<flatbuffers::FlatBufferBuilder> &&builder)
- : _builder{std::move(builder)}
+ : _builder{std::move(builder)}
{
// DO NOTHING
}
static DataChefRegistry fp32;
static DataChefRegistry u8;
static DataChefRegistry boolean;
+ static DataChefRegistry s16;
switch (type)
{
return u8;
case tflchef::BOOL:
return boolean;
+ case tflchef::INT16:
+ return s16;
default:
break;
}
std::vector<flatbuffers::Offset<::tflite::SubGraph>> &subgraph_vec;
std::unique_ptr<flatbuffers::FlatBufferBuilder> &flatbuffer_builder;
std::map<tflite::BuiltinOperator, int32_t> &builtin_code_map;
+ std::vector<std::string> &custom_code_vec;
std::string noname;
};
std::vector<flatbuffers::Offset<::tflite::SubGraph>> &subgraph_vec = cp.subgraph_vec;
std::unique_ptr<flatbuffers::FlatBufferBuilder> &flatbuffer_builder = cp.flatbuffer_builder;
std::map<tflite::BuiltinOperator, int32_t> &builtin_code_map = cp.builtin_code_map;
+ std::vector<std::string> &custom_code_vec = cp.custom_code_vec;
// Operand-related
std::vector<flatbuffers::Offset<::tflite::Tensor>> tensor_vec;
{
// Create array segments
auto tflite_array_segments =
- as_tflite_sparse_index_vec(*flatbuffer_builder, dm.array_segments());
+ as_tflite_sparse_index_vec(*flatbuffer_builder, dm.array_segments());
// Create array indices
auto tflite_array_indices =
- as_tflite_sparse_index_vec(*flatbuffer_builder, dm.array_indices());
+ as_tflite_sparse_index_vec(*flatbuffer_builder, dm.array_indices());
auto tflite_dim_metadata_builder = tflite::DimensionMetadataBuilder{*flatbuffer_builder};
tflite_dim_metadata_builder.add_format(as_tflite_dimensiontype(dm.format()));
tflite_dim_metadata_builder.add_dense_size(dm.dense_size());
tflite_dim_metadata_builder.add_array_segments(tflite_array_segments);
tflite_dim_metadata_builder.add_array_segments_type(
- as_tflite_sparse_idx_vec_type(dm.array_segments().type()));
+ as_tflite_sparse_idx_vec_type(dm.array_segments().type()));
tflite_dim_metadata_builder.add_array_indices(tflite_array_indices);
tflite_dim_metadata_builder.add_array_indices_type(
- as_tflite_sparse_idx_vec_type(dm.array_indices().type()));
+ as_tflite_sparse_idx_vec_type(dm.array_indices().type()));
auto tflite_dim_metadata = tflite_dim_metadata_builder.Finish();
dim_metadata_vec.emplace_back(tflite_dim_metadata);
}
// Create Operator
tflite::OperatorBuilder op_builder{*flatbuffer_builder};
- // Get operator code index from builtin_code_set with assumption, order of
- // builtin_code_set is same as that of code_vec
+ // Note that opcode_index is an index into the operator_codes vector.
+ // operator_codes consists of buildtin_code and custom_code, which is inserted sequentially.
+ uint32_t opcode_index = 0;
auto op_it = builtin_code_map.find(op_chef->code());
- assert(op_it != builtin_code_map.end());
- uint32_t opcode_index = std::distance(builtin_code_map.begin(), op_it);
+ // builtin operator
+ if (op_it != builtin_code_map.end())
+ {
+ opcode_index = std::distance(builtin_code_map.begin(), op_it);
+ }
+ // custom operator
+ else
+ {
+ auto op_it = std::find(custom_code_vec.begin(), custom_code_vec.end(), operation.type());
+ assert(op_it != custom_code_vec.end());
+ opcode_index = builtin_code_map.size();
+ opcode_index += std::distance(custom_code_vec.begin(), op_it);
+ }
op_builder.add_opcode_index(opcode_index);
op_builder.add_inputs(inputs);
// Initialize Data Chef Registry
#define DATA_CHEF(TYPE, NAME, FACTORY_CLASS) \
data_chef_registry(::tflchef::TYPE) \
- .add(#NAME, std::unique_ptr<FACTORY_CLASS>(new FACTORY_CLASS()));
+ .add(#NAME, std::unique_ptr<FACTORY_CLASS>(new FACTORY_CLASS()));
#include <souschef/DataChef.def>
#undef DATA_CHEF
// Create FlatBufferBuilder
//
auto flatbuffer_builder =
- std::unique_ptr<flatbuffers::FlatBufferBuilder>(new flatbuffers::FlatBufferBuilder(1024));
+ std::unique_ptr<flatbuffers::FlatBufferBuilder>(new flatbuffers::FlatBufferBuilder(1024));
// Operand-related
std::vector<flatbuffers::Offset<::tflite::Buffer>> buffer_vec;
// Create OperatorCode with Custom Operator
std::set<std::string> custom_code_set = gather_customcode_set(model_recipe);
- if (custom_code_set.size() &&
- builtin_code_map.find(tflite::BuiltinOperator_CUSTOM) == builtin_code_map.end())
- builtin_code_map[tflite::BuiltinOperator_CUSTOM] = 1;
+ std::vector<std::string> custom_code_vec{custom_code_set.begin(), custom_code_set.end()};
- for (auto opcode : custom_code_set)
+ for (auto opcode : custom_code_vec)
{
auto custom_code = flatbuffer_builder->CreateString(opcode);
tflite::OperatorCodeBuilder code_builder{*flatbuffer_builder};
//
// Create Main graph
//
- CookParams cp{buffer_vec, code_vec, subgraph_vec, flatbuffer_builder, builtin_code_map, "main"};
+ CookParams cp{buffer_vec, code_vec, subgraph_vec, flatbuffer_builder,
+ builtin_code_map, custom_code_vec, "main"};
cook_graph<::tflchef::ModelRecipe>(model_recipe, cp);
std::ostringstream stringStream;
stringStream << "sub_" << (g + 1);
- CookParams cp{buffer_vec, code_vec, subgraph_vec,
- flatbuffer_builder, builtin_code_map, stringStream.str()};
+ CookParams cp{buffer_vec, code_vec, subgraph_vec, flatbuffer_builder,
+ builtin_code_map, custom_code_vec, stringStream.str()};
cook_graph<::tflchef::Graph>(graph, cp);
}
// Return "GenerateModel"
return GeneratedModel{
- std::unique_ptr<GeneratedModelImpl>(new GeneratedModelImpl(std::move(flatbuffer_builder)))};
+ std::unique_ptr<GeneratedModelImpl>(new GeneratedModelImpl(std::move(flatbuffer_builder)))};
}
} // namespace tflchef
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BidirectionalSequenceLSTM.h"
+#include "Convert.h"
+
+#include <cassert>
+
+flatbuffers::Offset<void>
+BidirectionalSequenceLSTMChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ auto &operation = (*_operation);
+
+ assert(operation.has_bidirectional_sequence_lstm_options());
+
+ tflite::BidirectionalSequenceLSTMOptionsBuilder options_builder(fbb);
+ options_builder.add_fused_activation_function(
+ as_tflite_activation(operation.bidirectional_sequence_lstm_options().activation()));
+ options_builder.add_cell_clip(operation.bidirectional_sequence_lstm_options().cell_clip());
+ options_builder.add_proj_clip(operation.bidirectional_sequence_lstm_options().proj_clip());
+ options_builder.add_time_major(operation.bidirectional_sequence_lstm_options().time_major());
+ options_builder.add_asymmetric_quantize_inputs(
+ operation.bidirectional_sequence_lstm_options().asymmetric_quantize_inputs());
+ options_builder.add_merge_outputs(
+ operation.bidirectional_sequence_lstm_options().merge_outputs());
+
+ return options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef>
+BidirectionalSequenceLSTMChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new BidirectionalSequenceLSTMChef{operation}};
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_BIDIRECTIONALSEQUENCE_LSTM_H__
+#define __OP_BIDIRECTIONALSEQUENCE_LSTM_H__
+
+#include "OpChef.h"
+
+class BidirectionalSequenceLSTMChef final : public OpChef
+{
+public:
+ explicit BidirectionalSequenceLSTMChef(const tflchef::Operation *operation)
+ : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override
+ {
+ return tflite::BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM;
+ }
+
+ tflite::BuiltinOptions type(void) const override
+ {
+ return tflite::BuiltinOptions_BidirectionalSequenceLSTMOptions;
+ }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct BidirectionalSequenceLSTMChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_BIDIRECTIONALSEQUENCE_LSTM_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FakeQuant.h"
+#include "Convert.h"
+
+#include <cassert>
+
+flatbuffers::Offset<void> FakeQuantChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ auto &operation = (*_operation);
+ assert(operation.has_fakequant_options());
+
+ auto options = operation.fakequant_options();
+
+ tflite::FakeQuantOptionsBuilder fq_options_builder{fbb};
+ fq_options_builder.add_min(options.min());
+ fq_options_builder.add_max(options.max());
+ fq_options_builder.add_num_bits(options.num_bits());
+ fq_options_builder.add_narrow_range(options.narrow_range());
+
+ return fq_options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> FakeQuantChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new FakeQuantChef{operation}};
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_FAKE_QUANT_H__
+#define __OP_FAKE_QUANT_H__
+
+#include "OpChef.h"
+
+class FakeQuantChef final : public OpChef
+{
+public:
+ explicit FakeQuantChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_FAKE_QUANT; }
+
+ tflite::BuiltinOptions type(void) const override
+ {
+ return tflite::BuiltinOptions_FakeQuantOptions;
+ }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct FakeQuantChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_FAKE_QUANT_H__
{
public:
explicit LocalResponseNormalizationChef(const tflchef::Operation *operation)
- : _operation{operation}
+ : _operation{operation}
{
// DO NOTHING
}
// Note: 'CreateVector' should be placed before 'CreateOptions'
// Read flatbuffers.h 'void NotNested()' for more information
auto fb_squeeze_dims =
- fbb.CreateVector(options.squeeze_dim().data(), options.squeeze_dim().size());
+ fbb.CreateVector(options.squeeze_dim().data(), options.squeeze_dim().size());
return tflite::CreateSqueezeOptions(fbb, fb_squeeze_dims).Union();
}
strided_slice_options_builder.add_begin_mask(operation.strided_slice_options().begin_mask());
strided_slice_options_builder.add_end_mask(operation.strided_slice_options().end_mask());
strided_slice_options_builder.add_ellipsis_mask(
- operation.strided_slice_options().ellipsis_mask());
+ operation.strided_slice_options().ellipsis_mask());
strided_slice_options_builder.add_new_axis_mask(
- operation.strided_slice_options().new_axis_mask());
+ operation.strided_slice_options().new_axis_mask());
strided_slice_options_builder.add_shrink_axis_mask(
- operation.strided_slice_options().shrink_axis_mask());
+ operation.strided_slice_options().shrink_axis_mask());
return strided_slice_options_builder.Finish().Union();
}
tflite::UnidirectionalSequenceLSTMOptionsBuilder options_builder(fbb);
options_builder.add_fused_activation_function(
- as_tflite_activation(operation.unidirectional_sequence_lstm_options().activation()));
+ as_tflite_activation(operation.unidirectional_sequence_lstm_options().activation()));
options_builder.add_cell_clip(operation.unidirectional_sequence_lstm_options().cell_clip());
options_builder.add_proj_clip(operation.unidirectional_sequence_lstm_options().proj_clip());
options_builder.add_time_major(operation.unidirectional_sequence_lstm_options().time_major());
options_builder.add_asymmetric_quantize_inputs(
- operation.unidirectional_sequence_lstm_options().asymmetric_quantize_inputs());
+ operation.unidirectional_sequence_lstm_options().asymmetric_quantize_inputs());
return options_builder.Finish().Union();
}
{
public:
explicit UnidirectionalSequenceLSTMChef(const tflchef::Operation *operation)
- : _operation{operation}
+ : _operation{operation}
{
// DO NOTHING
}
OP_CHEF(AveragePool2D, AveragePool2DChefFactory)
OP_CHEF(BatchMatMul, BatchMatMulChefFactory)
OP_CHEF(BatchToSpaceND, BatchToSpaceNDChefFactory)
+OP_CHEF(BidirectionalSequenceLSTM, BidirectionalSequenceLSTMChefFactory)
OP_CHEF(Cast, CastChefFactory)
OP_CHEF(Ceil, CeilChefFactory)
OP_CHEF(Concatenation, ConcatenationChefFactory)
OP_CHEF(Equal, EqualChefFactory)
OP_CHEF(Exp, ExpChefFactory)
OP_CHEF(ExpandDims, ExpandDimsChefFactory)
+OP_CHEF(FakeQuant, FakeQuantChefFactory)
OP_CHEF(Fill, FillChefFactory)
OP_CHEF(Floor, FloorChefFactory)
OP_CHEF(FloorDiv, FloorDivChefFactory)
OP_CHEF(AddV2, AddV2ChefFactory)
OP_CHEF(All, AllChefFactory)
OP_CHEF(BatchMatMulV2, BatchMatMulV2ChefFactory)
+OP_CHEF(BroadcastTo, BroadcastToChefFactory)
OP_CHEF(MatMul, MatMulChefFactory)
OP_CHEF(MatrixBandPart, MatrixBandPartChefFactory)
OP_CHEF(MaxPoolWithArgMax, MaxPoolWithArgMaxChefFactory)
#include "Op/AveragePool2D.h"
#include "Op/BatchMatMul.h"
#include "Op/BatchToSpaceND.h"
+#include "Op/BidirectionalSequenceLSTM.h"
#include "Op/Cast.h"
#include "Op/Ceil.h"
#include "Op/Concatenation.h"
#include "Op/Equal.h"
#include "Op/Exp.h"
#include "Op/ExpandDims.h"
+#include "Op/FakeQuant.h"
#include "Op/Fill.h"
#include "Op/Floor.h"
#include "Op/FloorDiv.h"
#include "CustomOp/AddV2.h"
#include "CustomOp/All.h"
#include "CustomOp/BatchMatMulV2.h"
+#include "CustomOp/BroadcastTo.h"
#include "CustomOp/MatMul.h"
#include "CustomOp/MatrixBandPart.h"
#include "CustomOp/MaxPoolWithArgMax.h"
UINT8 = 3;
INT64 = 4;
BOOL = 6;
+ INT16 = 7;
}
enum DimensionType {
SYMMETRIC = 1;
}
+message BidirectionalSequenceLSTMOptions {
+ optional Activation activation = 1 [default = NONE];
+ optional float cell_clip = 2 [default = 0.0];
+ optional float proj_clip = 3 [default = 0.0];
+ optional bool merge_outputs = 6 [default = false];
+ optional bool time_major = 4 [default = true];
+ optional bool asymmetric_quantize_inputs = 5 [default = false];
+}
+
message Conv2DOptions
{
optional Padding padding = 1 [default = VALID];
optional bool include_batch_in_index = 7 [default = false];
}
+message FakeQuantOptions {
+ optional float min = 1 [default = 0.0];
+ optional float max = 2 [default = 0.0];
+ optional int32 num_bits = 3 [default = 0];
+ optional bool narrow_range = 4 [default = false];
+}
+
message Operation {
optional string type = 1;
repeated string input = 2;
optional SparseToDenseOptions sparse_to_dense_options = 175;
optional PowOptions pow_options = 176;
optional ArgMinOptions argmin_options = 177;
- // FakeQuantOptions 178
- // BidirectionalSequenceLSTMOptions 179
+ optional FakeQuantOptions fakequant_options = 178;
+ optional BidirectionalSequenceLSTMOptions bidirectional_sequence_lstm_options = 179;
// BidirectionalSequenceRNNOptions 180
optional UnidirectionalSequenceLSTMOptions unidirectional_sequence_lstm_options = 181;
optional RangeOptions range_options = 182;
--- /dev/null
+operand {
+ name: "ifm"
+ type: INT16
+ shape { dim: 1 dim: 5 dim: 5 dim: 2 }
+}
+operand {
+ name: "ker"
+ type: INT16
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+ filler {
+ tag: "gaussian"
+ arg: "1.0"
+ arg: "6.0"
+ }
+}
+operand {
+ name: "bias"
+ type: INT16
+ shape { dim: 1 }
+ filler {
+ tag: "constant"
+ arg: "12345"
+ }
+}
+operand {
+ name: "ofm"
+ type: INT16
+ shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ }
+ input: "ifm"
+ input: "ker"
+ input: "bias"
+ output: "ofm"
+}
+input: "ifm"
+input: "ker"
+output: "ofm"
target_include_directories(tflchef_tflite PRIVATE src)
target_link_libraries(tflchef_tflite tflchef_proto)
target_link_libraries(tflchef_tflite mio_tflite)
-target_link_libraries(tflchef_tflite stdex)
target_link_libraries(tflchef_tflite cwrap)
target_link_libraries(tflchef_tflite souschef)
return tflchef::UINT8;
case tflite::TensorType_BOOL:
return tflchef::BOOL;
+ case tflite::TensorType_INT16:
+ return tflchef::INT16;
// TODO handle other types
// TensorType_FLOAT16
// TensorType_STRING
- // TensorType_INT16
// TensorType_COMPLEX64
default:
throw std::runtime_error{"unsupported tensor type"};
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BidirectionalSequenceLSTM.h"
+
+#include "Convert.h"
+#include "FillerHelper.h"
+
+namespace tflchef
+{
+
+void TFliteOpBidirectionalSequenceLSTM::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
+ assert(inputs.size() == 48);
+
+ for (int32_t i = 0; i < inputs.size(); i++)
+ {
+ // Except for Input 0, 35, 36, 37 and 38.
+ // Each Input mean Input Tensor, ActivationState Tensor (forward and backward), and CellState
+ // Tensor (forward and backward).
+ // This could be updated from previous input or User Given data, so This could not be Const
+ if (i == 0 || i == 35 || i == 36 || i == 37 || i == 38)
+ continue;
+ if (inputs[i] != -1)
+ fill_tensor_to_import(inputs[i], import);
+ }
+}
+
+tflchef::Operation *
+TFliteOpBidirectionalSequenceLSTM::build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto op_params = op->builtin_options_as_BidirectionalSequenceLSTMOptions();
+ assert(op_params != nullptr);
+
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("BidirectionalSequenceLSTM");
+
+ auto op_options = operation->mutable_bidirectional_sequence_lstm_options();
+
+ op_options->set_activation(as_tflchef_activation(op_params->fused_activation_function()));
+ op_options->set_cell_clip(op_params->cell_clip());
+ op_options->set_proj_clip(op_params->proj_clip());
+ op_options->set_time_major(op_params->time_major());
+ op_options->set_asymmetric_quantize_inputs(op_params->asymmetric_quantize_inputs());
+ op_options->set_merge_outputs(op_params->merge_outputs());
+
+ return operation;
+}
+
+} // namespace tflchef
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_BIDIRECTIONALSEQUENCE_LSTM_H__
+#define __TFLITE_OP_BIDIRECTIONALSEQUENCE_LSTM_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for BidirectionalSequenceLSTM
+ */
+class TFliteOpBidirectionalSequenceLSTM : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_BIDIRECTIONALSEQUENCE_LSTM_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FakeQuant.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpFakeQuant::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ // Nothing to do with filler
+}
+
+tflchef::Operation *TFliteOpFakeQuant::build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto op_params = op->builtin_options_as_FakeQuantOptions();
+ assert(op_params != nullptr);
+
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("FakeQuant");
+
+ auto op_options = operation->mutable_fakequant_options();
+
+ op_options->set_min(op_params->min());
+ op_options->set_max(op_params->max());
+ op_options->set_num_bits(op_params->num_bits());
+ op_options->set_narrow_range(op_params->narrow_range());
+
+ return operation;
+}
+
+} // namespace tflchef
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_FAKE_QUANT_H__
+#define __TFLITE_OP_FAKE_QUANT_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for FakeQuant
+ */
+class TFliteOpFakeQuant : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_FAKE_QUANT_H__
#include "Maximum.h"
+#include "Convert.h"
+#include "FillerHelper.h"
+
namespace tflchef
{
void TFliteOpMaximum::filler(const tflite::Operator *op, TFliteImport *import,
tflchef::ModelRecipe *model_recipe) const
{
- // Nothing to do with filler
+ const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
+ assert(inputs.size() == 2);
+
+ fill_tensor_to_import(inputs[0], import);
+ fill_tensor_to_import(inputs[1], import);
}
tflchef::Operation *TFliteOpMaximum::build(const tflite::Operator *op, TFliteImport *import,
#include "Minimum.h"
#include "Convert.h"
+#include "FillerHelper.h"
namespace tflchef
{
void TFliteOpMinimum::filler(const tflite::Operator *op, TFliteImport *import,
tflchef::ModelRecipe *model_recipe) const
{
- // Nothing to do with filler
+ const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
+ assert(inputs.size() == 2);
+
+ fill_tensor_to_import(inputs[0], import);
+ fill_tensor_to_import(inputs[1], import);
}
tflchef::Operation *TFliteOpMinimum::build(const tflite::Operator *op, TFliteImport *import,
for (int32_t i = 0; i < inputs.size(); i++)
{
- // Except for Input 0, 17 and 18.
- // Each Input mean Input[0](=Input Tensor), Input[17](=OutputState Tensor) and
- // Input[18](=CellState Tensor).
+ // Except for Input 0, 18 and 19.
+ // Each Input mean Input[0](=Input Tensor), Input[18](=OutputState Tensor) and
+ // Input[19](=CellState Tensor).
// This could be updated from previous input or User Given data, so This could not be Const
- if (i == 0 || i == 17 || i == 18)
+ if (i == 0 || i == 18 || i == 19)
continue;
if (inputs[i] != -1)
fill_tensor_to_import(inputs[i], import);
#include "Op/AveragePool2D.h"
#include "Op/BatchMatMul.h"
#include "Op/BatchToSpaceND.h"
+#include "Op/BidirectionalSequenceLSTM.h"
#include "Op/Cast.h"
#include "Op/Ceil.h"
#include "Op/Concatenation.h"
#include "Op/Equal.h"
#include "Op/Exp.h"
#include "Op/ExpandDims.h"
+#include "Op/FakeQuant.h"
#include "Op/Fill.h"
#include "Op/Floor.h"
#include "Op/FloorDiv.h"
REG_TFL_OP(AVERAGE_POOL_2D, TFliteOpAveragePool2D);
REG_TFL_OP(BATCH_MATMUL, TFliteOpBatchMatMul);
REG_TFL_OP(BATCH_TO_SPACE_ND, TFliteOpBatchToSpaceND);
+ REG_TFL_OP(BIDIRECTIONAL_SEQUENCE_LSTM, TFliteOpBidirectionalSequenceLSTM);
REG_TFL_OP(CAST, TFliteOpCast);
REG_TFL_OP(CEIL, TFliteOpCeil);
REG_TFL_OP(CONCATENATION, TFliteOpConcatenation);
REG_TFL_OP(EQUAL, TFliteOpEqual);
REG_TFL_OP(EXP, TFliteOpExp);
REG_TFL_OP(EXPAND_DIMS, TFliteOpExpandDims);
+ REG_TFL_OP(FAKE_QUANT, TFliteOpFakeQuant);
REG_TFL_OP(FILL, TFliteOpFill);
REG_TFL_OP(FLOOR, TFliteOpFloor);
REG_TFL_OP(FLOOR_DIV, TFliteOpFloorDiv);
add_executable(tflchef Driver.cpp)
target_link_libraries(tflchef tflchef_core)
target_link_libraries(tflchef safemain)
+
+install(TARGETS tflchef DESTINATION bin)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(tflchef_test Driver.test.cpp Driver.cpp)
+target_link_libraries(tflchef_test tflchef_core)
#include <iostream>
-int entry(int argc, char **argv)
+int entry_stream(std::istream &is)
{
int32_t model_version = 1;
// Read a model recipe from standard input
{
- google::protobuf::io::IstreamInputStream iis{&std::cin};
+ google::protobuf::io::IstreamInputStream iis{&is};
if (!google::protobuf::TextFormat::Parse(&iis, &model_recipe))
{
std::cerr << "ERROR: Failed to parse recipe" << std::endl;
return 0;
}
+
+int entry(int, char **)
+{
+ // forward to entry_stream
+ return entry_stream(std::cin);
+}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+// entry function to test from Driver.cpp
+int entry_stream(std::istream &is);
+
+TEST(TFlChefDriverTest, entry_empty_NEG)
+{
+ std::istringstream empty_input("");
+
+ ASSERT_EQ(0, entry_stream(empty_input));
+}
+
+TEST(TFlChefDriverTest, entry_invaid_NEG)
+{
+ std::istringstream empty_input("invalid: input");
+
+ ASSERT_NE(0, entry_stream(empty_input));
+}
+
+TEST(TFlChefDriverTest, entry_invaid_version_NEG)
+{
+ std::istringstream empty_input("version: 9999");
+
+ ASSERT_NE(0, entry_stream(empty_input));
+}
target_link_libraries(tflchef-file arser)
target_link_libraries(tflchef-file tflchef_core)
target_link_libraries(tflchef-file safemain)
+
+install(TARGETS tflchef-file DESTINATION bin)
{
arser::Arser arser;
arser.add_argument("recipe")
- .type(arser::DataType::STR)
- .help("Source recipe file path to convert");
+ .type(arser::DataType::STR)
+ .help("Source recipe file path to convert");
arser.add_argument("tflite").type(arser::DataType::STR).help("Target tflite file path");
try
if (model_version > 1)
{
- std::cerr << "ERROR: Unsupported recipe version: " << model_version << ", '" << argv[1] << "'"
- << std::endl;
+ std::cerr << "ERROR: Unsupported recipe version: " << model_version << ", '" << recipe_path
+ << "'" << std::endl;
return 255;
}
target_link_libraries(tflchef-reverse tflchef_tflite)
target_link_libraries(tflchef-reverse safemain)
target_link_libraries(tflchef-reverse foder)
+
+install(TARGETS tflchef-reverse DESTINATION bin)
{
arser::Arser arser;
arser.add_argument("tflite")
- .type(arser::DataType::STR)
- .help("Source tflite file path to convert");
+ .type(arser::DataType::STR)
+ .help("Source tflite file path to convert");
arser.add_argument("recipe").type(arser::DataType::STR).help("Target recipe file path");
try
### Dependency
- safemain
-- stdex
- FlatBuffers
}
};
+class BidirectionalSequenceLSTMPrinter : public OpPrinter
+{
+public:
+ void options(const tflite::Operator *op, std::ostream &os) const override
+ {
+ if (auto *params = op->builtin_options_as_BidirectionalSequenceLSTMOptions())
+ {
+ os << " ";
+ os << "Activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
+ << ") ";
+ os << "cell_clip(" << params->cell_clip() << ") ";
+ os << "proj_clip(" << params->proj_clip() << ") ";
+ os << "time_major(" << params->time_major() << ") ";
+ os << "asymmetric_quantize_inputs(" << params->asymmetric_quantize_inputs() << ") ";
+ os << "merge_outputs(" << params->merge_outputs() << ") ";
+ os << std::endl;
+ }
+ }
+};
+
class CastPrinter : public OpPrinter
{
public:
os << "Stride.H(" << conv_params->stride_h() << ") ";
os << "DepthMultiplier(" << conv_params->depth_multiplier() << ") ";
os << "Dilation.W(" << conv_params->dilation_w_factor() << ") ";
- os << "Dilation.H(" << conv_params->dilation_h_factor() << ")";
+ os << "Dilation.H(" << conv_params->dilation_h_factor() << ") ";
os << "Activation("
<< EnumNameActivationFunctionType(conv_params->fused_activation_function()) << ") ";
os << std::endl;
}
};
+class FakeQuantPrinter : public OpPrinter
+{
+public:
+ void options(const tflite::Operator *op, std::ostream &os) const override
+ {
+ if (auto *params = op->builtin_options_as_FakeQuantOptions())
+ {
+ os << " ";
+ os << "Min(" << params->min() << ") ";
+ os << "Max(" << params->max() << ") ";
+ os << "NumBits(" << params->num_bits() << ") ";
+ os << std::boolalpha;
+ os << "NarrowRange(" << params->narrow_range() << ") ";
+ os << std::noboolalpha;
+ os << std::endl;
+ }
+ }
+};
+
class FullyConnectedPrinter : public OpPrinter
{
public:
_op_map[tflite::BuiltinOperator_ARG_MAX] = make_unique<ArgMaxPrinter>();
_op_map[tflite::BuiltinOperator_ARG_MIN] = make_unique<ArgMinPrinter>();
_op_map[tflite::BuiltinOperator_AVERAGE_POOL_2D] = make_unique<Pool2DPrinter>();
+ _op_map[tflite::BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM] =
+ make_unique<BidirectionalSequenceLSTMPrinter>();
_op_map[tflite::BuiltinOperator_CAST] = make_unique<CastPrinter>();
// There is no Option for CEIL
_op_map[tflite::BuiltinOperator_CONCATENATION] = make_unique<ConcatenationPrinter>();
_op_map[tflite::BuiltinOperator_DEPTHWISE_CONV_2D] = make_unique<DepthwiseConv2DPrinter>();
// There is no Option for DEQUANTIZE
_op_map[tflite::BuiltinOperator_DIV] = make_unique<DivPrinter>();
+ _op_map[tflite::BuiltinOperator_FAKE_QUANT] = make_unique<FakeQuantPrinter>();
// There is no Option for FLOOR
// There is no Option for FLOOR_MOD
_op_map[tflite::BuiltinOperator_FULLY_CONNECTED] = make_unique<FullyConnectedPrinter>();
_op_map[tflite::BuiltinOperator_L2_NORMALIZATION] = make_unique<L2NormPrinter>();
_op_map[tflite::BuiltinOperator_LEAKY_RELU] = make_unique<LeakyReluPrinter>();
_op_map[tflite::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION] =
- make_unique<LocalResponseNormalizationPrinter>();
+ make_unique<LocalResponseNormalizationPrinter>();
// There is no Option for LOG
// There is no Option for LOGISTIC
// There is no Option for LOG_SOFTMAX
_op_map[tflite::BuiltinOperator_RESHAPE] = make_unique<ReshapePrinter>();
_op_map[tflite::BuiltinOperator_RESIZE_BILINEAR] = make_unique<ResizeBilinearPrinter>();
_op_map[tflite::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR] =
- make_unique<ResizeNearestNeighborPrinter>();
+ make_unique<ResizeNearestNeighborPrinter>();
_op_map[tflite::BuiltinOperator_REVERSE_SEQUENCE] = make_unique<ReverseSequencePrinter>();
// There is no Option for ROUND
// There is no Option for SELECT
_op_map[tflite::BuiltinOperator_TRANSPOSE_CONV] = make_unique<TransposeConvPrinter>();
// There is no Option for TOPK_V2
_op_map[tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM] =
- make_unique<UnidirectionalSequenceLSTMPrinter>();
+ make_unique<UnidirectionalSequenceLSTMPrinter>();
_op_map[tflite::BuiltinOperator_UNIQUE] = make_unique<UniquePrinter>();
_op_map[tflite::BuiltinOperator_WHILE] = make_unique<WhilePrinter>();
_op_map[tflite::BuiltinOperator_CUSTOM] = make_unique<CustomOpPrinter>();
target_link_libraries(tflite2circle mio_tflite)
target_link_libraries(tflite2circle mio_circle)
target_link_libraries(tflite2circle vconone)
+target_link_libraries(tflite2circle nncc_coverage)
install(TARGETS tflite2circle DESTINATION bin)
arser::Arser arser{"tflite2circle is a Tensorflow lite to circle model converter"};
arser.add_argument("--version")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("Show version information and exit")
- .exit_with(print_version);
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("Show version information and exit")
+ .exit_with(print_version);
arser.add_argument("tflite")
- .nargs(1)
- .type(arser::DataType::STR)
- .help("Source tflite file path to convert");
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .help("Source tflite file path to convert");
arser.add_argument("circle").nargs(1).type(arser::DataType::STR).help("Target circle file path");
try
#include "BuildBuiltinOptions/ArgMinOptions.h"
#include "BuildBuiltinOptions/BatchMatMulOptions.h"
#include "BuildBuiltinOptions/BatchToSpaceNDOptions.h"
+#include "BuildBuiltinOptions/BidirectionalSequenceLSTMOptions.h"
#include "BuildBuiltinOptions/CastOptions.h"
#include "BuildBuiltinOptions/ConcatenationOptions.h"
#include "BuildBuiltinOptions/Conv2DOptions.h"
#include "BuildBuiltinOptions/EqualOptions.h"
#include "BuildBuiltinOptions/ExpandDimsOptions.h"
#include "BuildBuiltinOptions/ExpOptions.h"
+#include "BuildBuiltinOptions/FakeQuantOptions.h"
#include "BuildBuiltinOptions/FillOptions.h"
#include "BuildBuiltinOptions/FloorDivOptions.h"
#include "BuildBuiltinOptions/FloorModOptions.h"
assert(tflite_builtin_options);
circle::AddOptionsBuilder builtin_options_builder{fb};
builtin_options_builder.add_fused_activation_function(
- get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+ get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
return builtin_options_builder.Finish();
}
assert(tflite_builtin_options);
circle::ArgMaxOptionsBuilder builtin_options_builder{fb};
builtin_options_builder.add_output_type(
- get_circle_tensortype(tflite_builtin_options->output_type()));
+ get_circle_tensortype(tflite_builtin_options->output_type()));
return builtin_options_builder.Finish();
}
assert(tflite_builtin_options);
circle::ArgMinOptionsBuilder builtin_options_builder{fb};
builtin_options_builder.add_output_type(
- get_circle_tensortype(tflite_builtin_options->output_type()));
+ get_circle_tensortype(tflite_builtin_options->output_type()));
return builtin_options_builder.Finish();
}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BidirectionalSequenceLSTMOptions.h"
+#include "DataLookup.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::BidirectionalSequenceLSTMOptions>
+build_circle_BidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &fb,
+ const tflite::Operator *op)
+{
+ auto tflite_builtin_options = op->builtin_options_as_BidirectionalSequenceLSTMOptions();
+ circle::BidirectionalSequenceLSTMOptionsBuilder builtin_options_builder{fb};
+ builtin_options_builder.add_fused_activation_function(
+ get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+ builtin_options_builder.add_cell_clip(tflite_builtin_options->cell_clip());
+ builtin_options_builder.add_proj_clip(tflite_builtin_options->proj_clip());
+ builtin_options_builder.add_time_major(tflite_builtin_options->time_major());
+ builtin_options_builder.add_merge_outputs(tflite_builtin_options->merge_outputs());
+ builtin_options_builder.add_asymmetric_quantize_inputs(
+ tflite_builtin_options->asymmetric_quantize_inputs());
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_BIDIRECTIONALSEQUENCE_LSTM_OPTIONS_H__
+#define __BBO_BIDIRECTIONALSEQUENCE_LSTM_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::BidirectionalSequenceLSTMOptions>
+build_circle_BidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &fb,
+ const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_BIDIRECTIONALSEQUENCE_LSTM_OPTIONS_H__
circle::CastOptionsBuilder builtin_options_builder{fb};
builtin_options_builder.add_in_data_type(
- get_circle_tensortype(tflite_builtin_options->in_data_type()));
+ get_circle_tensortype(tflite_builtin_options->in_data_type()));
builtin_options_builder.add_out_data_type(
- get_circle_tensortype(tflite_builtin_options->out_data_type()));
+ get_circle_tensortype(tflite_builtin_options->out_data_type()));
return builtin_options_builder.Finish();
}
circle::ConcatenationOptionsBuilder builtin_options_builder{fb};
builtin_options_builder.add_axis(tflite_builtin_options->axis());
builtin_options_builder.add_fused_activation_function(
- get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+ get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
return builtin_options_builder.Finish();
}
builtin_options_builder.add_stride_w(tflite_builtin_options->stride_w());
builtin_options_builder.add_stride_h(tflite_builtin_options->stride_h());
builtin_options_builder.add_fused_activation_function(
- get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+ get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
builtin_options_builder.add_dilation_w_factor(tflite_builtin_options->dilation_w_factor());
builtin_options_builder.add_dilation_h_factor(tflite_builtin_options->dilation_h_factor());
return builtin_options_builder.Finish();
builtin_options_builder.add_stride_h(tflite_builtin_options->stride_h());
builtin_options_builder.add_depth_multiplier(tflite_builtin_options->depth_multiplier());
builtin_options_builder.add_fused_activation_function(
- get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+ get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
builtin_options_builder.add_dilation_w_factor(tflite_builtin_options->dilation_w_factor());
builtin_options_builder.add_dilation_h_factor(tflite_builtin_options->dilation_h_factor());
return builtin_options_builder.Finish();
assert(tflite_builtin_options);
circle::DivOptionsBuilder builtin_options_builder{fb};
builtin_options_builder.add_fused_activation_function(
- get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+ get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
return builtin_options_builder.Finish();
}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FillOptions.h"
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::FakeQuantOptions>
+build_circle_FakeQuantOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+ auto tflite_builtin_options = op->builtin_options_as_FakeQuantOptions();
+ assert(tflite_builtin_options);
+ circle::FakeQuantOptionsBuilder builtin_options_builder{fb};
+ builtin_options_builder.add_min(tflite_builtin_options->min());
+ builtin_options_builder.add_max(tflite_builtin_options->max());
+ builtin_options_builder.add_num_bits(tflite_builtin_options->num_bits());
+ builtin_options_builder.add_narrow_range(tflite_builtin_options->narrow_range());
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_FAKEQUANT_OPTIONS_H__
+#define __BBO_FAKEQUANT_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::FillOptions>
+build_circle_FakeQuantOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_FAKEQUANT_OPTIONS_H__
assert(tflite_builtin_options);
circle::FullyConnectedOptionsBuilder builtin_options_builder{fb};
builtin_options_builder.add_fused_activation_function(
- get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+ get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
// Get FullyConnectedOptionsWeightsFormat
auto tflite_weight_format = tflite_builtin_options->weights_format();
if (tflite_weight_format == tflite::FullyConnectedOptionsWeightsFormat_DEFAULT)
builtin_options_builder.add_weights_format(circle::FullyConnectedOptionsWeightsFormat_DEFAULT);
else if (tflite_weight_format == tflite::FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8)
builtin_options_builder.add_weights_format(
- circle::FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8);
+ circle::FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8);
return builtin_options_builder.Finish();
}
assert(tflite_builtin_options);
circle::L2NormOptionsBuilder builtin_options_builder{fb};
builtin_options_builder.add_fused_activation_function(
- get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+ get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
return builtin_options_builder.Finish();
}
assert(tflite_builtin_options);
circle::MulOptionsBuilder builtin_options_builder{fb};
builtin_options_builder.add_fused_activation_function(
- get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+ get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
return builtin_options_builder.Finish();
}
builtin_options_builder.add_filter_width(tflite_builtin_options->filter_width());
builtin_options_builder.add_filter_height(tflite_builtin_options->filter_height());
builtin_options_builder.add_fused_activation_function(
- get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+ get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
return builtin_options_builder.Finish();
}
assert(tflite_builtin_options);
circle::SubOptionsBuilder builtin_options_builder{fb};
builtin_options_builder.add_fused_activation_function(
- get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+ get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
return builtin_options_builder.Finish();
}
auto tflite_builtin_options = op->builtin_options_as_UnidirectionalSequenceLSTMOptions();
circle::UnidirectionalSequenceLSTMOptionsBuilder builtin_options_builder{fb};
builtin_options_builder.add_fused_activation_function(
- get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+ get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
builtin_options_builder.add_cell_clip(tflite_builtin_options->cell_clip());
builtin_options_builder.add_proj_clip(tflite_builtin_options->proj_clip());
builtin_options_builder.add_time_major(tflite_builtin_options->time_major());
builtin_options_builder.add_asymmetric_quantize_inputs(
- tflite_builtin_options->asymmetric_quantize_inputs());
+ tflite_builtin_options->asymmetric_quantize_inputs());
return builtin_options_builder.Finish();
}
assert(tflite_builtin_options);
circle::UniqueOptionsBuilder builtin_options_builder{fb};
builtin_options_builder.add_idx_out_type(
- get_circle_tensortype(tflite_builtin_options->idx_out_type()));
+ get_circle_tensortype(tflite_builtin_options->idx_out_type()));
return builtin_options_builder.Finish();
}
flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order;
flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map;
flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>>>
- dim_metadata;
+ dim_metadata;
// traversal_order
if (it->sparsity()->traversal_order())
{
auto traversal_order_vec = std::vector<int32_t>{
- it->sparsity()->traversal_order()->begin(), it->sparsity()->traversal_order()->end()};
+ it->sparsity()->traversal_order()->begin(), it->sparsity()->traversal_order()->end()};
traversal_order = fb->CreateVector(traversal_order_vec);
}
// array_segments
auto tflite_array_segments_type = it->array_segments_type();
auto circle_array_segments =
- get_circle_sparse_index_vector(*fb, it->array_segments(), tflite_array_segments_type);
+ get_circle_sparse_index_vector(*fb, it->array_segments(), tflite_array_segments_type);
auto circle_array_segments_type =
- get_circle_sparse_index_vector_type(tflite_array_segments_type);
+ get_circle_sparse_index_vector_type(tflite_array_segments_type);
// array_indices
auto tflite_array_indices_type = it->array_indices_type();
auto circle_array_indices =
- get_circle_sparse_index_vector(*fb, it->array_indices(), tflite_array_indices_type);
+ get_circle_sparse_index_vector(*fb, it->array_indices(), tflite_array_indices_type);
auto circle_array_indices_type =
- get_circle_sparse_index_vector_type(tflite_array_indices_type);
+ get_circle_sparse_index_vector_type(tflite_array_indices_type);
auto circle_dim_metadata_builder = circle::DimensionMetadataBuilder{*fb};
if (it->shape_signature())
{
auto shape_signature_vec =
- std::vector<int32_t>({it->shape_signature()->begin(), it->shape_signature()->end()});
+ std::vector<int32_t>({it->shape_signature()->begin(), it->shape_signature()->end()});
shape_signature = fb->CreateVector(shape_signature_vec);
}
}
CircleModel::CircleModel(FlatBufBuilder &fb, TFLModel &model)
- : _version{0}, _description{fb->CreateString("nnpackage")}, _fb{fb}
+ : _version{0}, _description{fb->CreateString("nnpackage")}, _fb{fb}
{
const tflite::Model *tfl_model = model.load_model();
// verify flatbuffers
}
_operator_codes_offset =
- std::make_unique<Offset<OperatorCodeLink>>(fb, tfl_model->operator_codes());
+ std::make_unique<Offset<OperatorCodeLink>>(fb, tfl_model->operator_codes());
_subGraphs_offset = std::make_unique<Offset<SubGraphLink>>(fb, tfl_model->subgraphs());
_buffers_offset = std::make_unique<Offset<BufferLink>>(fb, tfl_model->buffers());
_metadata_buffer_offset =
- std::make_unique<Offset<MetaDataBufferLink>>(fb, tfl_model->metadata_buffer());
+ std::make_unique<Offset<MetaDataBufferLink>>(fb, tfl_model->metadata_buffer());
model_build();
}
{
const tflite::Int32Vector *i32_array = static_cast<const tflite::Int32Vector *>(v_array);
auto values_vec_int32 =
- std::vector<int32_t>{i32_array->values()->begin(), i32_array->values()->end()};
+ std::vector<int32_t>{i32_array->values()->begin(), i32_array->values()->end()};
auto values_int32 = fb.CreateVector(values_vec_int32);
circle::Int32VectorBuilder int32_vector_builder{fb};
int32_vector_builder.add_values(values_int32);
{
const tflite::Uint16Vector *u16_array = static_cast<const tflite::Uint16Vector *>(v_array);
auto values_vec_uint16 =
- std::vector<uint16_t>{u16_array->values()->begin(), u16_array->values()->end()};
+ std::vector<uint16_t>{u16_array->values()->begin(), u16_array->values()->end()};
auto values_uint16 = fb.CreateVector(values_vec_uint16);
circle::Uint16VectorBuilder uint16_vector_builder{fb};
uint16_vector_builder.add_values(values_uint16);
{
const tflite::Uint8Vector *u8_array = static_cast<const tflite::Uint8Vector *>(v_array);
auto values_vec_uint8 =
- std::vector<uint8_t>{u8_array->values()->begin(), u8_array->values()->end()};
+ std::vector<uint8_t>{u8_array->values()->begin(), u8_array->values()->end()};
auto values_uint8 = fb.CreateVector(values_vec_uint8);
circle::Uint8VectorBuilder uint8_vector_builder{fb};
uint8_vector_builder.add_values(values_uint8);
* @brief Returns circle builtin_code according to tflite.
*
* @note You can see a list of currently supported BuiltinOperator in TFLOperator.lst file.
-*/
+ */
circle::BuiltinOperator get_circle_builtin_code(tflite::BuiltinOperator tfl_bop);
/**
* @brief Returns circle TensorType according to tflite.
*
* @note You can see a list of currently supported TensorType in TFLTensorType.lst file.
-*/
+ */
circle::TensorType get_circle_tensortype(tflite::TensorType tfl_tt);
/**
* @brief Returns circle Padding enum according to tflite.
-*/
+ */
circle::Padding get_circle_padding(tflite::Padding tfl_p);
/**
*
* @note You can see a list of currently supported ActivationFunctionType in
* TFLActivationFunctionType.lst file.
-*/
+ */
circle::ActivationFunctionType
get_circle_activation_function_type(tflite::ActivationFunctionType tfl_aft);
* This function calls the build_circle_##BuiltinOptions internally(e.g.
* build_circle_AbsOptions, build_circle_AddOptions, etc.), so refer to it for a more
* detailed implementation.
-*/
+ */
flatbuffers::Offset<void> get_circle_builtin_options(flatbuffers::FlatBufferBuilder &fb,
const tflite::Operator *op);
* @brief Returns circle builtin_options_type according to tflite.
*
* @note You can see a list of currently supported BuiltinOptions in TFLBuiltinOptions.lst file.
-*/
+ */
circle::BuiltinOptions get_circle_builtin_options_type(const tflite::Operator *op);
/**
* @brief Returns circle MirrorPadMode according to tflite.
-*/
+ */
circle::MirrorPadMode get_circle_mirrorpad_mode(tflite::MirrorPadMode tfl_mode);
/**
* @brief Returns circle DimensionType according to tflite.
-*/
+ */
circle::DimensionType get_circle_dimension_type(tflite::DimensionType tfl_dim_type);
/**
* @brief Returns circle SparseIndexVector according to tflite.
-*/
+ */
flatbuffers::Offset<void>
get_circle_sparse_index_vector(flatbuffers::FlatBufferBuilder &fb, const void *values,
const tflite::SparseIndexVector &tfl_sparse_index_vector_type);
/**
* @brief Returns circle SparseIndexVector type according to tflite.
-*/
+ */
circle::SparseIndexVector
get_circle_sparse_index_vector_type(const tflite::SparseIndexVector &tfl_sparse_index_vector_type);
TFL_BUILTIN_OPTIONS(ShapeOptions)
TFL_BUILTIN_OPTIONS(PowOptions)
TFL_BUILTIN_OPTIONS(ArgMinOptions)
-//TFL_BUILTIN_OPTIONS(FakeQuantOptions)
+TFL_BUILTIN_OPTIONS(FakeQuantOptions)
TFL_BUILTIN_OPTIONS(PackOptions)
TFL_BUILTIN_OPTIONS(LogicalOrOptions)
TFL_BUILTIN_OPTIONS(OneHotOptions)
TFL_BUILTIN_OPTIONS(SquareOptions)
TFL_BUILTIN_OPTIONS(ZerosLikeOptions)
TFL_BUILTIN_OPTIONS(FillOptions)
-//TFL_BUILTIN_OPTIONS(BidirectionalSequenceLSTMOptions)
+TFL_BUILTIN_OPTIONS(BidirectionalSequenceLSTMOptions)
//TFL_BUILTIN_OPTIONS(BidirectionalSequenceRNNOptions)
TFL_BUILTIN_OPTIONS(UnidirectionalSequenceLSTMOptions)
TFL_BUILTIN_OPTIONS(FloorModOptions)
if (NOT VCONONE_VERSION)
- set(VCONONE_VERSION 0x00000000000c0001)
+ set(VCONONE_VERSION 0x00000000000f0001)
# NOTE order is [build patch minor major]
# if VCONONE_VERSION is set with -D option, it will be cached
# you may have to remove cache file if you remove -D option
std::string get_copyright(void)
{
std::string str;
- str = "Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved\r\n";
+ str = "Copyright (c) 2020-2021 Samsung Electronics Co., Ltd. All Rights Reserved\r\n";
str += "Licensed under the Apache License, Version 2.0\r\n";
str += "https://github.com/Samsung/ONE";
return str;
+++ /dev/null
-../.clang-format.8
\ No newline at end of file
_program_source_map; /**< Contains sources for all programs.
Used for compile-time kernel inclusion. >*/
};
-}
+} // namespace arm_compute
#endif /* __ARM_COMPUTE_CLKERNELLIBRARY_EX_H__ */
#ifndef ARM_COMPUTE_CLARGMINMAXLAYERKERNELEX_H
#define ARM_COMPUTE_CLARGMINMAXLAYERKERNELEX_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
namespace arm_compute
#ifndef __ARM_COMPUTE_CLBINARYLOGICALOPKERNEL_H__
#define __ARM_COMPUTE_CLBINARYLOGICALOPKERNEL_H__
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/TypesEx.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
#ifndef __ARM_COMPUTE_CLCASTBOOLKERNEL_H__
#define __ARM_COMPUTE_CLCASTBOOLKERNEL_H__
-#include "arm_compute/core/CL/ICLSimple3DKernel.h"
+#include "src/core/CL/ICLSimple3DKernel.h"
namespace arm_compute
{
#ifndef __ARM_COMPUTE_CLEMBEDDINGLOOKUPKERNEL_H__
#define __ARM_COMPUTE_CLEMBEDDINGLOOKUPKERNEL_H__
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ARM_COMPUTE_CLGEMMMatrixAccumulateBiasesKernel_H
+#define ARM_COMPUTE_CLGEMMMatrixAccumulateBiasesKernel_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+/** Interface to add a bias to each row of the input tensor
+ *
+ */
+class CLGEMMMatrixAccumulateBiasesKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLGEMMMatrixAccumulateBiasesKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMMatrixAccumulateBiasesKernel(const CLGEMMMatrixAccumulateBiasesKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMMatrixAccumulateBiasesKernel &
+ operator=(const CLGEMMMatrixAccumulateBiasesKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLGEMMMatrixAccumulateBiasesKernel(CLGEMMMatrixAccumulateBiasesKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLGEMMMatrixAccumulateBiasesKernel &operator=(CLGEMMMatrixAccumulateBiasesKernel &&) = default;
+ /** Set the accumulate buffer and the biases of the kernel.
+ *
+ * @param[in, out] accum The accumulate tensor to convert. Data types supported: F16/F32
+ * @param[in] biases The shared biases tensor to append. It must be 1D tensor. Data types
+ * supported: Same as @p input
+ */
+ void configure(ICLTensor *accum, const ICLTensor *biases);
+ /** Set the accumulate buffer and the biases of the kernel.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in, out] accum The accumulate tensor to convert. Data types supported: F16/F32
+ * @param[in] biases The shared biases tensor to append. It must be 1D tensor. Data
+ * types supported: Same as @p input
+ */
+ void configure(const CLCompileContext &compile_context, ICLTensor *accum,
+ const ICLTensor *biases);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLGEMMMatrixAccumulateBiasesKernel
+ *
+ * @param[in] accum The accumulate tensor to convert. Data types supported: F16/F32
+ * @param[in] biases The shared biases tensor to append. It must be 1D tensor. Data types
+ * supported: Same as @p input
+ * @param[in] gpu_target GPU target
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *accum, const ITensorInfo *biases, GPUTarget gpu_target);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ ICLTensor *_accum;
+ const ICLTensor *_biases;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLGEMMMatrixAccumulateBiasesKernel_H */
#ifndef __ARM_COMPUTE_CLGATHEREXKERNEL_H__
#define __ARM_COMPUTE_CLGATHEREXKERNEL_H__
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
#ifndef __ARM_COMPUTE_CLHASHTABLELOOKUPKERNEL_H__
#define __ARM_COMPUTE_CLHASHTABLELOOKUPKERNEL_H__
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
namespace arm_compute
#ifndef __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNELEX_H__
#define __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNELEX_H__
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLMEMSETKERNEL_H
+#define ARM_COMPUTE_CLMEMSETKERNEL_H
+
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for filling the planes of a tensor */
+class CLMemsetKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLMemsetKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLMemsetKernel(const CLMemsetKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLMemsetKernel &operator=(const CLMemsetKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLMemsetKernel(CLMemsetKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLMemsetKernel &operator=(CLMemsetKernel &&) = default;
+ /** Default destructor */
+ ~CLMemsetKernel() = default;
+
+ /** Initialise the kernel's tensor and filling value
+ *
+ * @param[in,out] tensor Input tensor to fill. Supported data types: All.
+ * @param[in] constant_value The value used to fill the planes of the tensor
+ * @param[in] window Window to be used in case setting only part of a tensor. Default
+ * is nullptr.
+ */
+ void configure(ICLTensor *tensor, const PixelValue &constant_value, Window *window = nullptr);
+ /** Initialise the kernel's tensor and filling value
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in,out] tensor Input tensor to fill. Supported data types: All.
+ * @param[in] constant_value The value used to fill the planes of the tensor
+ * @param[in] window Window to be used in case setting only part of a tensor. Default
+ * is nullptr.
+ */
+ void configure(const CLCompileContext &compile_context, ICLTensor *tensor,
+ const PixelValue &constant_value, Window *window = nullptr);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLMemsetKernel
+ *
+ * @param[in] tensor Source tensor info. Data types supported: All.
+ * @param[in] constant_value The value used to fill the planes of the tensor
+ * @param[in] window Window to be used in case setting only part of a tensor. Default is
+ * nullptr.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *tensor, const PixelValue &constant_value,
+ Window *window = nullptr);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ ICLTensor *_tensor;
+ Window _full_window;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLMEMSETRKERNEL_H */
#ifndef __ARM_COMPUTE_CLMULTIPLYSCALEFACTORKERNEL_H__
#define __ARM_COMPUTE_CLMULTIPLYSCALEFACTORKERNEL_H__
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
#ifndef __ARM_COMPUTE_CLNEGKERNEL_H__
#define __ARM_COMPUTE_CLNEGKERNEL_H__
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
*/
#ifndef __ARM_COMPUTE_CLONEHOTKERNEL_H__
#define __ARM_COMPUTE_CLONEHOTKERNEL_H__
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
namespace arm_compute
{
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLPADLAYERKERNELEX_H
+#define ARM_COMPUTE_CLPADLAYERKERNELEX_H
+
+#include "src/core/CL/ICLKernel.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the PadLayer function. */
+class CLPadLayerKernelEx : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLPadLayerKernelEx();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLPadLayerKernelEx(const CLPadLayerKernelEx &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLPadLayerKernelEx &operator=(const CLPadLayerKernelEx &) = delete;
+ /** Allow instances of this class to be moved */
+ CLPadLayerKernelEx(CLPadLayerKernelEx &&) = default;
+ /** Allow instances of this class to be moved */
+ CLPadLayerKernelEx &operator=(CLPadLayerKernelEx &&) = default;
+ /** Default destructor */
+ ~CLPadLayerKernelEx() = default;
+ /** Set the input and output tensor.
+ *
+ * @param[in] input Source tensor. Data types supported: U8, S8, QASYMM8,
+ * QASYMM8_SIGNED, U16, S16, U32, S32, F16, F32.
+ * @param[out] output Output tensor. Data type supported: same as @p input
+ * @param[in] padding The padding for each spatial dimension of the input tensor. The pair
+ * padding[i] specifies the front and the end padding in the i-th dimension.
+ * @param[in] constant_value (Optional) Constant value to be used for the padding.
+ * @param[in] mode (Optional) Controls whether the padding should be filled with @p
+ * constant_value using CONSTANT, or reflect the input, either including the border values
+ * (SYMMETRIC) or not (REFLECT).
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const PaddingList &padding,
+ PixelValue constant_value = PixelValue(),
+ PaddingMode mode = PaddingMode::CONSTANT);
+ /** Set the input and output tensor.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: All.
+ * @param[out] output Output tensor. Data type supported: same as @p input
+ * @param[in] padding The padding for each spatial dimension of the input tensor. The
+ * pair padding[i] specifies the front and the end padding in the i-th dimension.
+ * @param[in] constant_value (Optional) Constant value to be used for the padding.
+ * @param[in] mode (Optional) Controls whether the padding should be filled with @p
+ * constant_value using CONSTANT, or reflect the input, either including the border values
+ * (SYMMETRIC) or not (REFLECT).
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output,
+ const PaddingList &padding, PixelValue constant_value = PixelValue(),
+ PaddingMode mode = PaddingMode::CONSTANT);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLPadLayerKernelEx
+ *
+ * @param[in] input Source tensor info. Data types supported: U8, S8, QASYMM8,
+ * QASYMM8_SIGNED, U16, S16, U32, S32, F16, F32.
+ * @param[in] output Output tensor info. Data type supported: same as @p input
+ * @param[in] padding The padding for each spatial dimension of the input tensor. The pair
+ * padding[i] specifies the front and the end padding in the i-th dimension.
+ * @param[in] constant_value (Optional) Constant value to be used for the padding.
+ * @param[in] mode (Optional) Controls whether the padding should be filled with @p
+ * constant_value using CONSTANT, or reflect the input, either including the border values
+ * (SYMMETRIC) or not (REFLECT).
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ const PaddingList &padding, PixelValue constant_value = PixelValue(),
+ PaddingMode mode = PaddingMode::CONSTANT);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ int _input_start_x;
+ int _input_start_y;
+ bool _4d_enabled;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLPADLAYERKERNELEX_H */
#ifndef __ARM_COMPUTE_CLQUANTIZATIONSYMMETRICKERNEL_H__
#define __ARM_COMPUTE_CLQUANTIZATIONSYMMETRICKERNEL_H__
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
#ifndef __ARM_COMPUTE_CLREDUCEOPERATIONKERNEL_H__
#define __ARM_COMPUTE_CLREDUCEOPERATIONKERNEL_H__
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
namespace arm_compute
#ifndef __ARM_COMPUTE_CLSCALEFACTORSYMM8KERNEL_H__
#define __ARM_COMPUTE_CLSCALEFACTORSYMM8KERNEL_H__
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
#ifndef __ARM_COMPUTE_CLTOPKV2KERNEL_H__
#define __ARM_COMPUTE_CLTOPKV2KERNEL_H__
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
// these parameters can be changed
#define _ITEMS 16 // number of items in a group
#ifndef __ARM_COMPUTE_NEBINARYLOGICALOPERATIONKERNEL_H__
#define __ARM_COMPUTE_NEBINARYLOGICALOPERATIONKERNEL_H__
-#include "arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h"
#include "arm_compute/core/TypesEx.h"
+#include "src/core/cpu/kernels/CpuElementwiseKernel.h"
+
namespace arm_compute
{
-class NEBinaryLogicalOperationKernel : public NEElementwiseOperationKernel
+class NEBinaryLogicalOperationKernel : public cpu::kernels::CpuComparisonKernel
{
public:
+ const char *name() const override { return "NEBinaryLogicalOperationKernel"; }
+
+ NEBinaryLogicalOperationKernel() = default;
/** Default destructor */
~NEBinaryLogicalOperationKernel() = default;
// Inherited methods overridden:
static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2,
const ITensorInfo &output);
+
+ std::function<void(const ITensor *input1, const ITensor *input2, ITensor *output,
+ const Window &window)>
+ _function;
};
} // namespace arm_compute
#endif /* __ARM_COMPUTE_NEBINARYLOGICALOPERATIONKERNEL_H__ */
#ifndef __ARM_COMPUTE_NECASTBOOLKERNEL_H__
#define __ARM_COMPUTE_NECASTBOOLKERNEL_H__
-#include "arm_compute/core/NEON/INEKernel.h"
+#include "src/core/NEON/INEKernel.h"
namespace arm_compute
{
#ifndef __ARM_COMPUTE_NEEMBEDDINGLOOKUPKERNEL_H__
#define __ARM_COMPUTE_NEEMBEDDINGLOOKUPKERNEL_H__
-#include "arm_compute/core/NEON/INEKernel.h"
+#include "src/core/NEON/INEKernel.h"
#include "arm_compute/core/Types.h"
namespace arm_compute
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEGEMMMATRIXACCUMULATEBIASESKERNEL_H
+#define ARM_COMPUTE_NEGEMMMATRIXACCUMULATEBIASESKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+/** NEON kernel to add a bias to each row of the input tensor */
+class NEGEMMMatrixAccumulateBiasesKernel : public INEKernel
+{
+public:
+ const char *name() const override { return "NEGEMMMatrixAccumulateBiasesKernel"; }
+ /** Default constructor */
+ NEGEMMMatrixAccumulateBiasesKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMMatrixAccumulateBiasesKernel(const NEGEMMMatrixAccumulateBiasesKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMMatrixAccumulateBiasesKernel &
+ operator=(const NEGEMMMatrixAccumulateBiasesKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEGEMMMatrixAccumulateBiasesKernel(NEGEMMMatrixAccumulateBiasesKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEGEMMMatrixAccumulateBiasesKernel &operator=(NEGEMMMatrixAccumulateBiasesKernel &&) = default;
+ /** Default destructor */
+ ~NEGEMMMatrixAccumulateBiasesKernel() = default;
+ /** Set the accumulate buffer and the biases of the kernel.
+ *
+ * @param[in, out] accum The accumulate tensor to convert. Data type supported: F32
+ * @param[in] biases The shared biases tensor to append. It must be 1D Tensor. Data type
+ * supported: Same as @p input
+ */
+ void configure(ITensor *accum, const ITensor *biases);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEGEMMMatrixAccumulateBiasesKernel
+ *
+ * @param[in] accum The accumulate tensor to convert. Data type supported: F32
+ * @param[in] biases The shared biases tensor to append. It must be 1D Tensor. Data type
+ * supported: Same as @p input
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *accum, const ITensorInfo *biases);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ ITensor *_accum;
+ const ITensor *_biases;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEGEMMMATRIXACCUMULATEBIASESKERNEL_H */
#ifndef __ARM_COMPUTE_NEGATHERKERNELEX_H__
#define __ARM_COMPUTE_NEGATHERKERNELEX_H__
-#include "arm_compute/core/NEON/INEKernel.h"
+#include "src/core/NEON/INEKernel.h"
#include "arm_compute/core/Types.h"
namespace arm_compute
#ifndef __ARM_COMPUTE_NEHASHTABLELOOKUPKERNEL_H__
#define __ARM_COMPUTE_NEHASHTABLELOOKUPKERNEL_H__
-#include "arm_compute/core/NEON/INEKernel.h"
+#include "src/core/NEON/INEKernel.h"
#include "arm_compute/core/Types.h"
namespace arm_compute
#ifndef __ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNELEX_H__
#define __ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNELEX_H__
-#include "arm_compute/core/NEON/INEKernel.h"
+#include "src/core/NEON/INEKernel.h"
namespace arm_compute
{
#ifndef __ARM_COMPUTE_NEMULTIPLYSCALEFACTORKERNEL_H__
#define __ARM_COMPUTE_NEMULTIPLYSCALEFACTORKERNEL_H__
-#include "arm_compute/core/NEON/INEKernel.h"
+#include "src/core/NEON/INEKernel.h"
namespace arm_compute
{
*/
#ifndef __ARM_COMPUTE_NEONEHOTKERNEL_H__
#define __ARM_COMPUTE_NEONEHOTKERNEL_H__
-#include "arm_compute/core/NEON/INEKernel.h"
+#include "src/core/NEON/INEKernel.h"
#include "arm_compute/core/Types.h"
namespace arm_compute
{
#ifndef __ARM_COMPUTE_NEQUANTIZATIONSYMMETRICKERNEL_H__
#define __ARM_COMPUTE_NEQUANTIZATIONSYMMETRICKERNEL_H__
-#include "arm_compute/core/NEON/INEKernel.h"
+#include "src/core/NEON/INEKernel.h"
namespace arm_compute
{
unsigned int kernel_width, unsigned int kernel_height,
const PadStrideInfo &info, unsigned int invalid_right,
unsigned int invalid_top);
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_UTILSEX_H__ */
#include <arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h>
#include <arm_compute/runtime/CL/functions/CLNeg.h>
#include <arm_compute/runtime/CL/functions/CLOneHot.h>
+#include <arm_compute/runtime/CL/functions/CLPadLayerEx.h>
#include <arm_compute/runtime/CL/functions/CLReduceOperation.h>
#include <arm_compute/runtime/CL/functions/CLSplitVEx.h>
#include <arm_compute/runtime/CL/functions/CLTopKV2.h>
#define __ARM_COMPUTE_CLARGMINMAXLAYEREX_H__
#include "arm_compute/core/CL/kernels/CLArgMinMaxLayerKernelEx.h"
-#include "arm_compute/core/CL/kernels/CLReshapeLayerKernel.h"
+
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/functions/CLReshapeLayer.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
std::vector<CLTensor> _results_vector;
CLTensor _not_reshaped_output;
std::vector<CLArgMinMaxLayerKernelEx> _reduction_kernels_vector;
- CLReshapeLayerKernel _reshape_kernel;
+ CLReshapeLayer _reshape_kernel;
unsigned int _num_of_stages;
unsigned int _reduction_axis;
};
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
#include "arm_compute/core/TypesEx.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
namespace arm_compute
{
*/
void configure(ICLTensor *input, ICLTensor *output);
};
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_CLCASTBOOL_H */
*/
void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *lookups);
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_CLEMBEDDINGLOOKUP_H__ */
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
#include "arm_compute/core/CL/kernels/CLMultiplyScaleFactorKernel.h"
#include "arm_compute/core/CL/kernels/CLQuantizationSymmetricKernel.h"
#include "arm_compute/core/CL/kernels/CLScaleFactorSymm8Kernel.h"
-#include "arm_compute/core/CL/kernels/CLTransposeKernel.h"
-#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+#include "src/core/CL/kernels/CLTransposeKernel.h"
namespace arm_compute
{
bool _is_prepared;
const ICLTensor *_original_weights;
};
-}
+} // namespace arm_compute
#endif /* __ARM_COMPUTE_CLFULLYCONNECTEDHYBRIDLAYER_H__ */
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
-#include "arm_compute/core/CL/kernels/CLTransposeKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h"
#include "arm_compute/runtime/CL/functions/CLFlattenLayer.h"
#include "arm_compute/runtime/CL/functions/CLGEMM.h"
#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
-#include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h"
#include "arm_compute/runtime/IWeightsManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
+#include "src/core/CL/kernels/CLTransposeKernel.h"
namespace arm_compute
{
* transpose_weights is set to true ) (called once)
* -# @ref CLGEMMMatrixMultiplyKernel or @ref CLGEMMLowpMatrixMultiplyCore (if quantized
* asymmetric)
- * -# @ref CLGEMMMatrixAccumulateBiasesKernel or @ref
- * CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) (if @p biases is
- * not equal to nullptr)
*
* @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
*/
* @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32.
* @param[in] weights Weights tensor. The weights must be 2 dimensional.
* If this function is called after a Convolution Layer, the (transposed)
- * weights will have as many rows as the product of the first 3 input's dimensions.
- * If it is called after another FullyConnected Layer, the (transposed)
- * weights will have as many rows as the input's first dimension.
- * Data type supported: Same as @p input.
+ * weights will have as many rows as the product of the first 3 input's dimensions. If it is
+ * called after another FullyConnected Layer, the (transposed) weights will have as many rows as
+ * the input's first dimension. Data type supported: Same as @p input.
* @param[in] biases Bias tensor. Can be nullptr. Data type supported:Same as @p input.
* @param[out] output Destination tensor. Its shape should be equal to the output of a matrix
* multiplication between:
* - The output of im2col on the input and the (transposed) 2D weights, if the
* function is called after a Convolution Layer
* - The input tensor and the (transposed) 2D weights, if the function is
- * called after another FullyConnected Layer.
- * Data type supported: Same as @p input.
+ * called after another FullyConnected Layer. Data type supported: Same as @p input.
* @param[in] fc_info (Optional) Fully connected layer additional info
*/
void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases,
ICLTensor *output, FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref
- * CLFullyConnectedLayerEx
+ * CLFullyConnectedLayer
*
* @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32.
* @param[in] weights Weights tensor info. The weights must be 2 dimensional.
* If this function is called after a Convolution Layer, the (transposed)
- * weights will have as many rows as the product of the first 3 input's dimensions.
- * If it is called after another FullyConnected Layer, the (transposed)
- * weights will have as many rows as the input's first dimension.
- * Data type supported: Same as @p input.
+ * weights will have as many rows as the product of the first 3 input's dimensions. If it is
+ * called after another FullyConnected Layer, the (transposed) weights will have as many rows as
+ * the input's first dimension. Data type supported: Same as @p input.
* @param[in] biases Bias tensor info. Can be nullptr. Data type supported:Same as @p input.
* @param[out] output Destination tensor info. Its shape should be equal to the output of a
* matrix multiplication between:
* - The output of im2col on the input and the (transposed) 2D weights, if the
* function is called after a Convolution Layer
* - The input tensor and the (transposed) 2D weights, if the function is
- * called after another FullyConnected Layer.
- * Data type supported: Same as @p input.
+ * called after another FullyConnected Layer. Data type supported: Same as @p input.
* @param[in] fc_info (Optional) Fully connected layer additional info
*
* @return a status
#ifndef __ARM_COMPUTE_CLGATHEREX_H__
#define __ARM_COMPUTE_CLGATHEREX_H__
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/**
* @brief Class to to run @ref CLGatherKernel.
static Status validate(const ITensorInfo *input, const ITensorInfo *indices,
const ITensorInfo *output, int axis = 0);
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_CLGATHEREX_H__ */
void configure(const ICLTensor *lookups, const ICLTensor *keys, const ICLTensor *intput,
ICLTensor *output, ICLTensor *hits);
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_CLHASHTABLELOOKUP_H__ */
#ifndef __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYEREX_H__
#define __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYEREX_H__
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to perform a Instance normalization.
*
*/
#ifndef __ARM_COMPUTE_CLONEHOT_H__
#define __ARM_COMPUTE_CLONEHOT_H__
-#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
+
#include "arm_compute/core/CL/kernels/CLOneHotKernel.h"
+#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
#include "arm_compute/runtime/IFunction.h"
+
namespace arm_compute
{
class ICLTensor;
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLPADLAYEREX_H
+#define ARM_COMPUTE_CLPADLAYEREX_H
+
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/core/CL/kernels/CLPadLayerKernelEx.h"
+#include "src/core/gpu/cl/kernels/ClCopyKernel.h"
+// #include "arm_compute/runtime/CL/functions/CLCopy.h"
+#include <memory>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to pad a tensor. This function calls the following OpenCL functions/kernels:
+ *
+ * -# @ref CLPadLayerKernelEx if there is padding to be added
+ * -# @ref CLCopyKernel otherwise
+ */
+class CLPadLayerEx : public IFunction
+{
+public:
+ /** Default constructor */
+ CLPadLayerEx();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLPadLayerEx(const CLPadLayerEx &) = delete;
+ /** Default move constructor */
+ CLPadLayerEx(CLPadLayerEx &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLPadLayerEx &operator=(const CLPadLayerEx &) = delete;
+ /** Default move assignment operator */
+ CLPadLayerEx &operator=(CLPadLayerEx &&) = default;
+
+ /** Initialize the function
+ *
+ * @param[in] input Source tensor. Data types supported: All.
+ * @param[out] output Output tensor. Data type supported: same as @p input
+ * @param[in] padding The padding for each spatial dimension of the input tensor. The pair
+ * padding[i] specifies the front and the end padding in the i-th dimension.
+ * @param[in] constant_value (Optional) Constant value to be used for the padding.
+ * @param[in] mode (Optional) Controls whether the padding should be filled with @p
+ * constant_value using CONSTANT, or reflect the input, either including the border values
+ * (SYMMETRIC) or not (REFLECT).
+ */
+ void configure(ICLTensor *input, ICLTensor *output, const PaddingList &padding,
+ PixelValue constant_value = PixelValue(),
+ PaddingMode mode = PaddingMode::CONSTANT);
+ /** Initialize the function
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: All.
+ * @param[out] output Output tensor. Data type supported: same as @p input
+ * @param[in] padding The padding for each spatial dimension of the input tensor. The
+ * pair padding[i] specifies the front and the end padding in the i-th dimension.
+ * @param[in] constant_value (Optional) Constant value to be used for the padding.
+ * @param[in] mode (Optional) Controls whether the padding should be filled with @p
+ * constant_value using CONSTANT, or reflect the input, either including the border values
+ * (SYMMETRIC) or not (REFLECT).
+ */
+ void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output,
+ const PaddingList &padding, PixelValue constant_value = PixelValue(),
+ PaddingMode mode = PaddingMode::CONSTANT);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLPadLayerEx.
+ *
+ * @param[in] input Source tensor info. Data types supported: All.
+ * @param[in] output Output tensor info. Data type supported: same as @p input
+ * @param[in] padding The padding for each spatial dimension of the input tensor. The pair
+ * padding[i] specifies the front and the end padding in the i-th dimension.
+ * @param[in] constant_value (Optional) Constant value to be used for the padding
+ * @param[in] mode (Optional) Controls whether the padding should be filled with @p
+ * constant_value using CONSTANT, or reflect the input, either including the border values
+ * (SYMMETRIC) or not (REFLECT).
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ const PaddingList &padding, PixelValue constant_value = PixelValue(),
+ PaddingMode mode = PaddingMode::CONSTANT);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ void configure_reflect_mode(ICLTensor *input, ICLTensor *output);
+
+ std::unique_ptr<CLPadLayerKernelEx> _pad_kernel;
+ std::unique_ptr<opencl::kernels::ClCopyKernel> _copy_kernel;
+ bool _perform_pad;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLPADLAYEREX_H */
std::unique_ptr<CLReduceOperationKernel[]> _reduce_kernels{nullptr};
CLReshapeLayer _reshape;
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_CLREDUCEOPERATION_H__ */
#include <vector>
#include <memory>
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/runtime/CPP/functions/CPPSplit.h"
+
namespace arm_compute
{
class ICLTensor;
unsigned int _num_splits;
std::vector<CLSlice> _slice_functions;
};
-}
+} // namespace arm_compute
#endif /* __ARM_COMPUTE_CLSPLITVEX__ */
CLTopKV2Store _store_kernel;
#endif
};
-}
+} // namespace arm_compute
#endif // __ARM_COMPUTE_CLTOPK_V2_H__
#ifndef __ARM_COMPUTE_NEFUNCTIONSEX_H__
#define __ARM_COMPUTE_NEFUNCTIONSEX_H__
-#include <arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h>
#include <arm_compute/runtime/NEON/functions/NECastBool.h>
#include <arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h>
#include <arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h>
#ifndef __ARM_COMPUTE_NEBINARYLOGICALOPERATION_H__
#define __ARM_COMPUTE_NEBINARYLOGICALOPERATION_H__
+#include "arm_compute/core/Error.h"
#include "arm_compute/core/TypesEx.h"
#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/core/ITensorInfo.h"
namespace arm_compute
{
#define __ARM_COMPUTE_NECASTBOOL_H__
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/**
- * @brief Class to run @ref NECastBoolKernel.
+ * @brief Class to run @ref INESimpleFunctionNoBorder.
*/
-class NECastBool : public INESimpleFunction
+class NECastBool : public INESimpleFunctionNoBorder
{
public:
/** Initialize the function's source, destination
#define __ARM_COMPUTE_NEEMBEDDINGLOOKUP_H__
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/core/Error.h"
#include <vector>
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/**
* @brief Class to perform EmbeddingLookup operation
static Status validate(const ITensorInfo *input, const ITensorInfo *output,
const ITensorInfo *lookups);
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_NEEMBEDDINGLOOKUP_H__ */
#include "arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h"
#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h"
#include "arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h"
-#include "arm_compute/core/NEON/kernels/NETransposeKernel.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
#include "arm_compute/runtime/Tensor.h"
+#include "src/core/NEON/kernels/NETransposeKernel.h"
namespace arm_compute
{
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h"
-#include "arm_compute/core/NEON/kernels/NETransposeKernel.h"
-#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEFlattenLayer.h"
#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
#include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h"
#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
+#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h"
+#include "src/core/NEON/kernels/NETransposeKernel.h"
namespace arm_compute
{
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEFullyConnectedLayerEx(const NEFullyConnectedLayerEx &) = delete;
/** Default move constructor */
- NEFullyConnectedLayerEx(NEFullyConnectedLayerEx &&) = default;
+ NEFullyConnectedLayerEx(NEFullyConnectedLayerEx &&) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEFullyConnectedLayerEx &operator=(const NEFullyConnectedLayerEx &) = delete;
/** Default move assignment operator */
- NEFullyConnectedLayerEx &operator=(NEFullyConnectedLayerEx &&) = default;
+ NEFullyConnectedLayerEx &operator=(NEFullyConnectedLayerEx &&) = delete;
/** Set the input and output tensors.
*
* @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32.
void configure_mm(const ITensor *input, const ITensor *weights, ITensor *output);
MemoryGroup _memory_group;
- NEFlattenLayerKernel _flatten_kernel;
+ NEFlattenLayer _flatten_kernel;
NEConvertFullyConnectedWeights _convert_weights;
NEFullyConnectedLayerReshapeWeights _reshape_weights_function;
NEGEMM _mm_gemm;
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref NEGatherKernelEx */
class NEGatherEx : public INESimpleFunctionNoBorder
#define __ARM_COMPUTE_NEHASHTABLELOOKUP_H__
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/core/Error.h"
#include <vector>
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/**
* @brief Class to perform HashtableLookup operation
const ITensorInfo *input, const ITensorInfo *output,
const ITensorInfo *hits);
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_NEHASHTABLELOOKUP_H__ */
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/** Basic function to perform a Instance normalization.
*
Tensor _permuted_input;
Tensor _permuted_output;
};
-}
+} // namespace arm_compute
#endif /* __ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYEREX_H__ */
{
// Forward declarations
class ITensor;
+class ITensorInfo;
+
/** Basic function to run @ref NEOneHotKernel */
class NEOneHot : public INESimpleFunctionNoBorder
{
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEQuantizationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h"
#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
+#include "arm_compute/runtime/Tensor.h"
namespace arm_compute
{
/** Prevent instances of this class from being copied (As this class contains pointers) */
NETransposeConvLayer &operator=(const NETransposeConvLayer &) = delete;
/** Allow instances of this class to be moved */
- NETransposeConvLayer(NETransposeConvLayer &&) = default;
+ NETransposeConvLayer(NETransposeConvLayer &&) = delete;
/** Allow instances of this class to be moved */
- NETransposeConvLayer &operator=(NETransposeConvLayer &&) = default;
+ NETransposeConvLayer &operator=(NETransposeConvLayer &&) = delete;
/** Default destructor */
virtual ~NETransposeConvLayer() = default;
PadStrideInfo _info;
bool _is_prepared;
};
-} // arm_compute
+} // namespace arm_compute
#endif /* __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__ */
{"gather_ex_1d", "gather_ex.cl"},
{"gather_ex_1d_out", "gather_ex.cl"},
{"gemmlowp_mm_midgard_ex", "gemmlowp_ex.cl"},
+ {"gemm_accumulate_biases", "gemm.cl"},
{"hashtable_lookup", "hashtable_lookup.cl"},
{"instance_normalization_ex", "instance_normalization_ex.cl"},
+ {"memset", "memset.cl"},
{"multiply_scale_factor", "multiply_scale_factor.cl"},
{"neg_tensor", "neg_tensor.cl"},
{"one_hot", "one_hot.cl"},
{"one_hot_only_on_value", "one_hot.cl"},
+ {"pad_layer_constant", "pad_layer.cl"},
+ {"pad_layer_symmetric_reflect", "pad_layer.cl"},
{"quantization_symm8", "quantization_symm8.cl"},
{"reduce_min_max", "reduce_operation.cl"},
{"reduce_sum_mean", "reduce_operation.cl"},
const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map = {
#ifdef EMBEDDED_KERNELS
+ {
+ "activation_float_helpers.h",
+#include "./cl_kernels/activation_float_helpers.hembed"
+ },
{
"arg_min_max_ex.cl",
#include "./cl_kernels/arg_min_max_ex.clembed"
+ },
+ {
+ "binary_logical_op.cl",
+#include "./cl_kernels/binary_logical_op.clembed"
},
{
"cast.cl",
{
"gemmlowp_ex.cl",
#include "./cl_kernels/gemmlowp_ex.clembed"
+ },
+ {
+ "gemm_helpers.h",
+#include "./cl_kernels/gemm_helpers.hembed"
},
{
"hashtable_lookup.cl",
#include "./cl_kernels/instance_normalization_ex.clembed"
},
{
- "binary_logical_op.cl",
-#include "./cl_kernels/binary_logical_op.clembed"
+ "gemm.cl",
+#include "./cl_kernels/gemm.clembed"
+ },
+ {
+ "memset.cl",
+#include "./cl_kernels/memset.clembed"
},
{
"multiply_scale_factor.cl",
{
"one_hot.cl",
#include "./cl_kernels/one_hot.clembed"
+ },
+ {
+ "pad_layer.cl",
+#include "./cl_kernels/pad_layer.clembed"
},
{
"quantization_symm8.cl",
{
"reduce_operation.cl",
#include "./cl_kernels/reduce_operation.clembed"
+ },
+ {
+ "repeat.h",
+#include "./cl_kernels/repeat.hembed"
},
{
"scale_factor.cl",
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "helpers.h"
+
+#if GPU_ARCH == GPU_ARCH_BIFROST
+#define MLA(a, b, c) (fma(c, b, a))
+#else // GPU_ARCH == GPU_ARCH_BIFROST
+#define MLA(a, b, c) ((b) * (c) + (a))
+#endif // GPU_ARCH == GPU_ARCH_BIFROST
+
+// Hard-Swish
+#define hard_swish_op(DATA_TYPE, x, A_VAL, B_VAL) \
+ (x * ((min(max((x + (DATA_TYPE)3.0), (DATA_TYPE)0.0), (DATA_TYPE)6.0)) * (DATA_TYPE)0.166666667))
+
+// Logistic Activation
+#define logistic_op(DATA_TYPE, x, A_VAL, B_VAL) ((DATA_TYPE)1.0 / ((DATA_TYPE)1.0 + exp(-x)))
+
+// Hyperbolic Tangent Activation
+#define tanh_op(DATA_TYPE, x, A_VAL, B_VAL) ((DATA_TYPE)A_VAL * tanh((DATA_TYPE)B_VAL * x))
+
+// RELU Tangent Activation
+#define relu_op(DATA_TYPE, x, A_VAL, B_VAL) (max((DATA_TYPE)0.0, x))
+
+// Bounded RELU Activation
+#define brelu_op(DATA_TYPE, x, A_VAL, B_VAL) (min((DATA_TYPE)A_VAL, max((DATA_TYPE)0.0, x)))
+
+// Lower Upper Bounded RELU Activation
+#define lu_brelu_op(DATA_TYPE, x, A_VAL, B_VAL) (min(max(x, (DATA_TYPE)B_VAL), (DATA_TYPE)A_VAL))
+
+// Leaky RELU Activation
+#define lrelu_op(DATA_TYPE, x, A_VAL, B_VAL) \
+ ((min(x, (DATA_TYPE)0.0) * (DATA_TYPE)A_VAL) + max(x, (DATA_TYPE)0.0))
+
+// Soft RELU Activation
+#define srelu_op(DATA_TYPE, x, A_VAL, B_VAL) (log((DATA_TYPE)1.0 + exp(x)))
+
+// ELU Activation
+#define elu_op(DATA_TYPE, x, A_VAL, B_VAL) \
+ (select(((DATA_TYPE)A_VAL * (exp(x) - (DATA_TYPE)1.0)), x, isgreaterequal(x, (DATA_TYPE)0.0)))
+
+// Absolute Activation
+#define abs_op(DATA_TYPE, x, A_VAL, B_VAL) (fabs(x))
+
+// Square Activation
+#define square_op(DATA_TYPE, x, A_VAL, B_VAL) (x * x)
+
+// Square-root Activation
+#define sqrt_op(DATA_TYPE, x, A_VAL, B_VAL) (sqrt(x))
+
+// Linear Activation
+#define linear_op(DATA_TYPE, x, A_VAL, B_VAL) (MLA((DATA_TYPE)B_VAL, (DATA_TYPE)A_VAL, x))
+
+// Identity Activation
+#define identity_op(DATA_TYPE, x, A_VAL, B_VAL) (x)
+
+#define ACT_OP(op, DATA_TYPE, x, A_VAL, B_VAL) op##_op(DATA_TYPE, x, A_VAL, B_VAL)
+
+#define ACTIVATION(op, DATA_TYPE, x, A_VAL, B_VAL) ACT_OP(op, DATA_TYPE, x, A_VAL, B_VAL)
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Copyright (c) 2017-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "gemm_helpers.h"
+#include "repeat.h"
+
+#if defined(M0) && defined(K0) && defined(V0) && defined(DATA_TYPE) && defined(SRC_WIDTH)
+#define INC2 (VEC_DATA_TYPE(uint, 2))(0, 1)
+#define INC3 (VEC_DATA_TYPE(uint, 3))(0, 1, 2)
+#define INC4 (VEC_DATA_TYPE(uint, 4))(0, 1, 2, 3)
+#define INC8 (VEC_DATA_TYPE(uint, 8))(0, 1, 2, 3, 4, 5, 6, 7)
+#define INC16 (VEC_DATA_TYPE(uint, 16))(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
+#define CONCAT_INC(K0) INC##K0
+#define INC(K0) CONCAT_INC(K0)
+
+#if (SRC_WIDTH % K0)
+#define BOUNDARY_CONDITION_X(x, a) \
+ ({ \
+ a = select( \
+ 0, a, \
+ CONVERT(((x * (VEC_DATA_TYPE(uint, K0))K0 + INC(K0)) < (VEC_DATA_TYPE(uint, K0))SRC_WIDTH), \
+ VEC_DATA_TYPE(DATA_TYPE, K0))); \
+ })
+#else // (SRC_WIDTH % K0)
+#define BOUNDARY_CONDITION_X(x, a) ({})
+#endif // (SRC_WIDTH % K0)
+
+/** This OpenCL kernel reshapes the lhs input matrix. The kernel splits the input matrix in blocks
+ * of size M0xK0 and stores each one (not transposed) in the output matrix unrolling the values.
+ *
+ * @note The data type must be passed at compile time using -DDATA_TYPE (e.g. -DDATA_TYPE=float)
+ * @note The width of the input tensor must be passed at compile time using -DSRC_WIDTH (e.g.
+ * -DSRC_WIDTH=16)
+ * @note The block's dimensions (M0 and K0) must be passed at compile time using -DM0 and -DK0 (e.g.
+ * -DM0=2, -DK0=2).
+ * @note The number of M0xK0 vertical blocks to store on the same output row must be passed at
+ * compile time using -DV0 (e.g. -DV0=2)
+ * @note Only the following values for M0, K0 and V0 are supported:
+ * M0: 2,3,4,5,6,7,8
+ * K0: 2,3,4,8,16
+ * V0: greater than 0
+ * @note In case the input has to be reinterpreted as a 3D tensor (e.g. input of convolution layer
+ * 1x1), the following information must be passed at compile time:
+ * -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ * -# HEIGHT_GEMM3D: The height of the input in case it has to be reinterpreted as a 3D
+ * tensor.
+ * -# DEPTH_GEMM3D: The depth of the input in case it has to be reinterpreted as a 3D tensor
+ * (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ * @note If the M0xK0 blocks have to be interleaved, the option -DINTERLEAVE must passed at compile
+ * time.
+ *
+ * @param[in] src_ptr Pointer to the source LHS tensor. Supported data
+ * types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] src_stride_x Stride of the source LHS tensor in X dimension (in
+ * bytes)
+ * @param[in] src_step_x src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in] src_stride_y Stride of the source LHS tensor in Y dimension (in
+ * bytes)
+ * @param[in] src_step_y src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in] src_stride_z Stride of the source LHS tensor in Z dimension (in
+ * bytes)
+ * @param[in] src_step_z src_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source LHS
+ * tensor
+ * @param[out] dst_ptr Pointer to the destination matrix Supported data
+ * types: same as @p src_ptr
+ * @param[in] dst_stride_x Stride of the destination matrix in X dimension (in
+ * bytes)
+ * @param[in] dst_step_x dst_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination matrix in Y dimension (in
+ * bytes)
+ * @param[in] dst_step_y dst_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in] dst_step_z dst_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in] cross_plane_pad (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_INPUT_AS_3D)
+ */
+__kernel void gemm_reshape_lhs_matrix_nt(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(dst)
+#if defined(REINTERPRET_INPUT_AS_3D)
+ ,
+ uint cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+)
+{
+ // Block size
+#define BLOCK_SIZE ((M0) * (K0))
+
+ // Output offset X
+#if defined(INTERLEAVE)
+#define OUTPUT_OFFSET_X (K0)
+#else // defined(INTERLEAVE)
+#define OUTPUT_OFFSET_X (BLOCK_SIZE)
+#endif // defined(INTERLEAVE)
+
+ // Output step X
+#if defined(INTERLEAVE)
+#define OUTPUT_STEP_X (K0) * (V0)
+#else // Do not interleave
+#define OUTPUT_STEP_X (K0)
+#endif // defined(INTERLEAVE)
+
+ // Compute source and destination addresses
+ uint x = get_global_id(0);
+ uint y = get_global_id(1);
+ uint z = get_global_id(2);
+
+ // ------------------ Compute input/output addresses ---------------------------
+
+ // Compute the input address
+ __global uchar *input_ptr = src_ptr + src_offset_first_element_in_bytes +
+ x * (uint)K0 * sizeof(DATA_TYPE) + y * (uint)M0 * src_stride_y;
+
+ // Compute the output address
+ __global uchar *output_ptr = dst_ptr + dst_offset_first_element_in_bytes +
+ (x * (uint)BLOCK_SIZE * (uint)V0 * sizeof(DATA_TYPE)) +
+ ((y / (uint)V0) * (uint)dst_stride_y) +
+ ((y % V0) * (uint)OUTPUT_OFFSET_X * sizeof(DATA_TYPE));
+
+ // Create variables: uint zin0=0, zin1=0, zin2=0...zin(M0-1)=0;
+ REPEAT_VAR_INIT_TO_CONST(M0, uint, zin, 0);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply src_stride_z by DEPTH_GEMM3D
+
+ input_ptr += z * (uint)src_stride_z * DEPTH_GEMM3D;
+
+ // The plane (zin) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+ CALCULATE_Z_OFFSET(M0, uint, zin, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, cross_plane_pad, src_stride_y);
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+ input_ptr += z * (uint)src_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+ // Add offset for batched GEMM
+ output_ptr += z * (uint)dst_stride_z;
+
+ // ---------------------------Load input values --------------------------------
+ // Load values from the LHS matrix
+ LOAD_BLOCK(M0, K0, DATA_TYPE, a, input_ptr, 0, src_stride_y, zin);
+ BOUNDARY_CONDITION_X(x, a0);
+#if M0 > 1
+ BOUNDARY_CONDITION_X(x, a1);
+#endif // M0 > 1
+#if M0 > 2
+ BOUNDARY_CONDITION_X(x, a2);
+#endif // M0 > 2
+#if M0 > 3
+ BOUNDARY_CONDITION_X(x, a3);
+#endif // M0 > 3
+#if M0 > 4
+ BOUNDARY_CONDITION_X(x, a4);
+#endif // M0 > 4
+#if M0 > 5
+ BOUNDARY_CONDITION_X(x, a5);
+#endif // M0 > 5
+#if M0 > 6
+ BOUNDARY_CONDITION_X(x, a6);
+#endif // M0 > 6
+#if M0 > 7
+ BOUNDARY_CONDITION_X(x, a7);
+#endif // M0 > 7
+ // ---------------------------Store output values ------------------------------
+ REPEAT_VAR_INIT_TO_CONST(16, uint, zout, 0);
+ STORE_BLOCK(M0, K0, DATA_TYPE, a, output_ptr, OUTPUT_STEP_X * sizeof(DATA_TYPE), zout);
+
+#undef BLOCK_SIZE
+#undef OUTPUT_OFFSET_X
+#undef OUTPUT_STEP_X
+}
+
+#if M0 == 2
+#define TRANSPOSE_COLUMN_AND_STORE(output_ptr, output_step_x, i) \
+ ({ \
+ VEC_DATA_TYPE(DATA_TYPE, M0) \
+ res = (VEC_DATA_TYPE(DATA_TYPE, M0))(a0.s##i, a1.s##i); \
+ VSTORE(M0) \
+ (res, 0, (__global DATA_TYPE *)(output_ptr + 0x##i * output_step_x * sizeof(DATA_TYPE))); \
+ })
+#elif M0 == 3 // M0 == 3
+#define TRANSPOSE_COLUMN_AND_STORE(output_ptr, output_step_x, i) \
+ ({ \
+ VEC_DATA_TYPE(DATA_TYPE, M0) \
+ res = (VEC_DATA_TYPE(DATA_TYPE, M0))(a0.s##i, a1.s##i, a2.s##i); \
+ VSTORE(M0) \
+ (res, 0, (__global DATA_TYPE *)(output_ptr + 0x##i * output_step_x * sizeof(DATA_TYPE))); \
+ })
+#elif M0 == 4 // M0 == 4
+#define TRANSPOSE_COLUMN_AND_STORE(output_ptr, output_step_x, i) \
+ ({ \
+ VEC_DATA_TYPE(DATA_TYPE, M0) \
+ res = (VEC_DATA_TYPE(DATA_TYPE, M0))(a0.s##i, a1.s##i, a2.s##i, a3.s##i); \
+ VSTORE(M0) \
+ (res, 0, (__global DATA_TYPE *)(output_ptr + 0x##i * output_step_x * sizeof(DATA_TYPE))); \
+ })
+#elif M0 == 5 // M0 == 5
+#define TRANSPOSE_COLUMN_AND_STORE(output_ptr, output_step_x, i) \
+ ({ \
+ VEC_DATA_TYPE(DATA_TYPE, 4) \
+ res0 = (VEC_DATA_TYPE(DATA_TYPE, 4))(a0.s##i, a1.s##i, a2.s##i, a3.s##i); \
+ DATA_TYPE res1 = a4.s##i; \
+ VSTORE(4) \
+ (res0, 0, (__global DATA_TYPE *)(output_ptr + 0x##i * output_step_x * sizeof(DATA_TYPE))); \
+ *((__global DATA_TYPE *)(output_ptr + 0x##i * output_step_x * sizeof(DATA_TYPE)) + 4) = res1; \
+ })
+#elif M0 == 6 // M0 == 6
+#define TRANSPOSE_COLUMN_AND_STORE(output_ptr, output_step_x, i) \
+ ({ \
+ VEC_DATA_TYPE(DATA_TYPE, 4) \
+ res0 = (VEC_DATA_TYPE(DATA_TYPE, 4))(a0.s##i, a1.s##i, a2.s##i, a3.s##i); \
+ VEC_DATA_TYPE(DATA_TYPE, 2) \
+ res1 = (VEC_DATA_TYPE(DATA_TYPE, 2))(a4.s##i, a5.s##i); \
+ VSTORE(4) \
+ (res0, 0, (__global DATA_TYPE *)(output_ptr + 0x##i * output_step_x * sizeof(DATA_TYPE))); \
+ VSTORE(2) \
+ (res1, 0, (__global DATA_TYPE *)(output_ptr + 0x##i * output_step_x * sizeof(DATA_TYPE)) + 4); \
+ })
+#elif M0 == 7 // M0 == 7
+#define TRANSPOSE_COLUMN_AND_STORE(output_ptr, output_step_x, i) \
+ ({ \
+ VEC_DATA_TYPE(DATA_TYPE, 4) \
+ res0 = (VEC_DATA_TYPE(DATA_TYPE, 4))(a0.s##i, a1.s##i, a2.s##i, a3.s##i); \
+ VEC_DATA_TYPE(DATA_TYPE, 3) \
+ res1 = (VEC_DATA_TYPE(DATA_TYPE, 3))(a4.s##i, a5.s##i, a6.s##i); \
+ VSTORE(4) \
+ (res0, 0, (__global DATA_TYPE *)(output_ptr + 0x##i * output_step_x * sizeof(DATA_TYPE))); \
+ VSTORE(3) \
+ (res1, 0, (__global DATA_TYPE *)(output_ptr + 0x##i * output_step_x * sizeof(DATA_TYPE)) + 4); \
+ })
+#elif M0 == 8 // M0 == 8
+#define TRANSPOSE_COLUMN_AND_STORE(output_ptr, output_step_x, i) \
+ ({ \
+ VEC_DATA_TYPE(DATA_TYPE, M0) \
+ res = (VEC_DATA_TYPE(DATA_TYPE, M0))(a0.s##i, a1.s##i, a2.s##i, a3.s##i, a4.s##i, a5.s##i, \
+ a6.s##i, a7.s##i); \
+ VSTORE(M0) \
+ (res, 0, (__global DATA_TYPE *)(output_ptr + 0x##i * output_step_x * sizeof(DATA_TYPE))); \
+ })
+#else // M0 not supported
+#error "M0 value not supported"
+#endif // N0 conditions
+
+/** This OpenCL kernel reshapes the lhs input matrix. The kernel splits the input matrix in blocks
+ * of size M0xK0 and stores each one (transposed) in the output matrix unrolling the values.
+ *
+ * @note The data type must be passed at compile time using -DDATA_TYPE (e.g. -DDATA_TYPE=float)
+ * @note The width of the input tensor must be passed at compile time using -DSRC_WIDTH (e.g.
+ * -DSRC_WIDTH=16)
+ * @note The block's dimensions (M0 and K0) must be passed at compile time using -DM0 and -DK0 (e.g.
+ * -DM0=2, -DK0=2).
+ * @note The number of M0xK0 vertical blocks to store on the same output row must be passed at
+ * compile time using -DV0 (e.g. -DV0=2)
+ * @note Only the following values for M0, K0 and V0 are supported:
+ * M0: 2,3,4,5,6,7,8
+ * K0: 2,3,4,8,16
+ * V0: greater than 0
+ * @note In case the input has to be reinterpreted as a 3D tensor (e.g. input of convolution layer
+ * 1x1), the following information must be passed at compile time:
+ * -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ * -# HEIGHT_GEMM3D: The height of the input in case it has to be reinterpreted as a 3D
+ * tensor.
+ * -# DEPTH_GEMM3D: The depth of the input in case it has to be reinterpreted as a 3D tensor
+ * (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ * @note If the M0xK0 blocks have to be interleaved, the option -DINTERLEAVE must passed at compile
+ * time.
+ *
+ * @param[in] src_ptr Pointer to the source LHS tensor. Supported data
+ * types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] src_stride_x Stride of the source LHS tensor in X dimension (in
+ * bytes)
+ * @param[in] src_step_x src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in] src_stride_y Stride of the source LHS tensor in Y dimension (in
+ * bytes)
+ * @param[in] src_step_y src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in] src_stride_z Stride of the source LHS tensor in Z dimension (in
+ * bytes)
+ * @param[in] src_step_z src_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source LHS
+ * tensor
+ * @param[out] dst_ptr Pointer to the destination matrix Supported data
+ * types: same as @p src_ptr
+ * @param[in] dst_stride_x Stride of the destination matrix in X dimension (in
+ * bytes)
+ * @param[in] dst_step_x dst_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination matrix in Y dimension (in
+ * bytes)
+ * @param[in] dst_step_y dst_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in] dst_step_z dst_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in] cross_plane_pad (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_INPUT_AS_3D)
+ */
+__kernel void gemm_reshape_lhs_matrix_t(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(dst)
+#if defined(REINTERPRET_INPUT_AS_3D)
+ ,
+ uint cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+)
+{
+ // Block size
+#define BLOCK_SIZE ((M0) * (K0))
+
+ // Output offset X
+#if defined(INTERLEAVE)
+#define OUTPUT_OFFSET_X (M0)
+#else // defined(INTERLEAVE)
+#define OUTPUT_OFFSET_X (BLOCK_SIZE)
+#endif // defined(INTERLEAVE)
+
+ // Output step X
+#if defined(INTERLEAVE)
+#define OUTPUT_STEP_X (M0) * (V0)
+#else // Do not interleave
+#define OUTPUT_STEP_X (M0)
+#endif // defined(INTERLEAVE)
+
+ // Compute source and destination addresses
+ uint x = get_global_id(0);
+ uint y = get_global_id(1);
+ uint z = get_global_id(2);
+
+ // ------------------ Compute input/output addresses ---------------------------
+
+ // Compute the input address
+ __global uchar *input_ptr = src_ptr + src_offset_first_element_in_bytes +
+ x * (uint)K0 * sizeof(DATA_TYPE) + y * (uint)M0 * src_stride_y;
+
+ // Compute the output address
+ __global uchar *output_ptr = dst_ptr + dst_offset_first_element_in_bytes +
+ (x * (uint)BLOCK_SIZE * (uint)V0 * sizeof(DATA_TYPE)) +
+ ((y / (uint)V0) * (uint)dst_stride_y) +
+ ((y % V0) * (uint)OUTPUT_OFFSET_X * sizeof(DATA_TYPE));
+
+ // Create variables: uint zin0=0, zin1=0, zin2=0...zin(M0-1)=0;
+ REPEAT_VAR_INIT_TO_CONST(M0, uint, zin, 0);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply src_stride_z by DEPTH_GEMM3D
+
+ input_ptr += z * (uint)src_stride_z * DEPTH_GEMM3D;
+
+ // The plane (zin) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+ CALCULATE_Z_OFFSET(M0, uint, zin, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, cross_plane_pad, src_stride_y);
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+ input_ptr += z * (uint)src_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+ // Add offset for batched GEMM
+ output_ptr += z * (uint)dst_stride_z;
+
+ // ---------------------------Load input values --------------------------------
+
+ // Load values from the LHS matrix
+ LOAD_BLOCK(M0, K0, DATA_TYPE, a, input_ptr, 0, src_stride_y, zin);
+ BOUNDARY_CONDITION_X(x, a0);
+#if M0 > 1
+ BOUNDARY_CONDITION_X(x, a1);
+#endif // M0 > 1
+#if M0 > 2
+ BOUNDARY_CONDITION_X(x, a2);
+#endif // M0 > 2
+#if M0 > 3
+ BOUNDARY_CONDITION_X(x, a3);
+#endif // M0 > 3
+#if M0 > 4
+ BOUNDARY_CONDITION_X(x, a4);
+#endif // M0 > 4
+#if M0 > 5
+ BOUNDARY_CONDITION_X(x, a5);
+#endif // M0 > 5
+#if M0 > 6
+ BOUNDARY_CONDITION_X(x, a6);
+#endif // M0 > 6
+#if M0 > 7
+ BOUNDARY_CONDITION_X(x, a7);
+#endif // M0 > 7
+ // ---------------------------Transpose and store block -----------------------
+
+ TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, 0);
+ TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, 1);
+#if K0 > 2
+ TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, 2);
+#endif // K0 > 2
+#if K0 > 3
+ TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, 3);
+#endif // K0 > 3
+#if K0 > 4
+ TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, 4);
+ TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, 5);
+ TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, 6);
+ TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, 7);
+#endif // K0 > 4
+#if K0 > 8
+ TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, 8);
+ TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, 9);
+ TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, A);
+ TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, B);
+ TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, C);
+ TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, D);
+ TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, E);
+ TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, F);
+#endif // K0 > 8
+
+#undef BLOCK_SIZE
+#undef OUTPUT_OFFSET_X
+#undef OUTPUT_STEP_X
+}
+#endif // defined(M0) && defined(K0) && defined(V0) && defined(DATA_TYPE) && defined(SRC_WIDTH)
+
+#if defined(K0) && defined(N0) && defined(H0) && defined(DATA_TYPE) && defined(SRC_HEIGHT)
+/** This OpenCL kernel reshapes the rhs input matrix. The kernel splits the input matrix in blocks
+ * of size K0xN0 and stores each one (not transposed) in the output matrix unrolling the values.
+ *
+ * @note The data type must be passed at compile time using -DDATA_TYPE (e.g. -DDATA_TYPE=float)
+ * @note The height of the input tensor must be passed at compile time using -DSRC_HEIGHT (e.g.
+ * -DSRC_HEIGHT=16)
+ * @note The block's dimensions (K0 and N0) must be passed at compile time using -DK0 and -DN0 (e.g.
+ * -DK0=2, -DN0=2).
+ * @note The number of K0xN0 vertical blocks to store on the same output row must be passed at
+ * compile time using -DH0 (e.g. -DH0=2)
+ * @note If the K0xN0 blocks have to be interleaved, the option -DINTERLEAVE must passed at compile
+ * time.
+ * @note Only the following values for K0, N0 and H0 are supported:
+ * N0: 2,3,4,8,16
+ * K0: 1,2,3,4,8,16
+ * H0: greater than 0
+ *
+ * @param[in] src_ptr Pointer to the source RHS tensor. Supported data
+ * types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] src_stride_x Stride of the source RHS tensor in X dimension (in
+ * bytes)
+ * @param[in] src_step_x src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in] src_stride_y Stride of the source RHS tensor in Y dimension (in
+ * bytes)
+ * @param[in] src_step_y src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in] src_stride_z Stride of the source RHS tensor in Z dimension (in
+ * bytes)
+ * @param[in] src_step_z src_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source RHS
+ * tensor
+ * @param[out] dst_ptr Pointer to the destination matrix Supported data
+ * types: same as @p src_ptr
+ * @param[in] dst_stride_x Stride of the destination matrix in X dimension (in
+ * bytes)
+ * @param[in] dst_step_x dst_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination matrix in Y dimension (in
+ * bytes)
+ * @param[in] dst_step_y dst_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in] dst_step_z dst_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ */
+__kernel void gemm_reshape_rhs_matrix_nt(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(dst))
+{
+ // Block size
+#define BLOCK_SIZE ((K0) * (N0))
+
+ // Output offset X
+#if defined(INTERLEAVE)
+#define OUTPUT_OFFSET_X (N0)
+#else // defined(INTERLEAVE)
+#define OUTPUT_OFFSET_X (BLOCK_SIZE)
+#endif // defined(INTERLEAVE)
+
+ // Output step X
+#if defined(INTERLEAVE)
+#define OUTPUT_STEP_X (N0) * (H0)
+#else // Do not interleave
+#define OUTPUT_STEP_X (N0)
+#endif // defined(INTERLEAVE)
+
+ // Compute source and destination addresses
+ uint x = get_global_id(0);
+ uint y = get_global_id(1);
+ uint z = get_global_id(2);
+
+ // ------------------ Compute input/output addresses ---------------------------
+
+ // Compute the input address
+ __global uchar *input_ptr = src_ptr + src_offset_first_element_in_bytes +
+ x * (uint)N0 * sizeof(DATA_TYPE) + y * (uint)K0 * src_stride_y +
+ z * (uint)src_stride_z;
+
+ // Compute the output address
+ __global uchar *output_ptr = dst_ptr + dst_offset_first_element_in_bytes +
+ (y * (uint)BLOCK_SIZE * (uint)H0 * sizeof(DATA_TYPE)) +
+ ((x % (uint)H0) * (uint)OUTPUT_OFFSET_X * sizeof(DATA_TYPE)) +
+ ((x / (uint)H0) * (uint)dst_stride_y) + z * (uint)dst_stride_z;
+
+ // ---------------------------Load input values --------------------------------
+
+ REPEAT_VAR_INIT_TO_CONST(K0, VEC_DATA_TYPE(DATA_TYPE, N0), a,
+ 0); ////uint a0=0, a1=0, a2=0...a(M0-1)=0;
+
+ // Load values from the RHS matrix
+ a0 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 0 * src_stride_y));
+#if K0 > 1
+ if (y * (uint)K0 + 1 < SRC_HEIGHT)
+ {
+ a1 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 1 * src_stride_y));
+ }
+#endif // K0 > 1
+#if K0 > 2
+ if (y * (uint)K0 + 2 < SRC_HEIGHT)
+ {
+ a2 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 2 * src_stride_y));
+ }
+#endif // K0 > 2
+#if K0 > 3
+ if (y * (uint)K0 + 3 < SRC_HEIGHT)
+ {
+ a3 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 3 * src_stride_y));
+ }
+#endif // K0 > 3
+#if K0 > 4
+ if (y * (uint)K0 + 4 < SRC_HEIGHT)
+ {
+ a4 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 4 * src_stride_y));
+ }
+ if (y * (uint)K0 + 5 < SRC_HEIGHT)
+ {
+ a5 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 5 * src_stride_y));
+ }
+ if (y * (uint)K0 + 6 < SRC_HEIGHT)
+ {
+ a6 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 6 * src_stride_y));
+ }
+ if (y * (uint)K0 + 7 < SRC_HEIGHT)
+ {
+ a7 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 7 * src_stride_y));
+ }
+#endif // K0 > 4
+#if K0 > 8
+ if (y * (uint)K0 + 8 < SRC_HEIGHT)
+ {
+ a8 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 8 * src_stride_y));
+ }
+ if (y * (uint)K0 + 9 < SRC_HEIGHT)
+ {
+ a9 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 9 * src_stride_y));
+ }
+ if (y * (uint)K0 + 10 < SRC_HEIGHT)
+ {
+ aA = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 10 * src_stride_y));
+ }
+ if (y * (uint)K0 + 11 < SRC_HEIGHT)
+ {
+ aB = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 11 * src_stride_y));
+ }
+ if (y * (uint)K0 + 12 < SRC_HEIGHT)
+ {
+ aC = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 12 * src_stride_y));
+ }
+ if (y * (uint)K0 + 13 < SRC_HEIGHT)
+ {
+ aD = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 13 * src_stride_y));
+ }
+ if (y * (uint)K0 + 14 < SRC_HEIGHT)
+ {
+ aE = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 14 * src_stride_y));
+ }
+ if (y * (uint)K0 + 15 < SRC_HEIGHT)
+ {
+ aF = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 15 * src_stride_y));
+ }
+#endif // K0 > 8
+
+ // ---------------------------Store output values ------------------------------
+ REPEAT_VAR_INIT_TO_CONST(16, uint, zout, 0);
+ STORE_BLOCK(K0, N0, DATA_TYPE, a, output_ptr, OUTPUT_STEP_X * sizeof(DATA_TYPE), zout);
+
+#undef BLOCK_SIZE
+#undef OUTPUT_OFFSET_X
+#undef OUTPUT_STEP_X
+}
+
+#if defined(TRANSPOSE)
+/** This OpenCL kernel reshapes the rhs input matrix. The kernel splits the input matrix in blocks
+ * of size K0xN0 and stores each one (transposed) in the output matrix unrolling the values.
+ *
+ * @note The data type must be passed at compile time using -DDATA_TYPE (e.g. -DDATA_TYPE=float)
+ * @note The height of the input tensor must be passed at compile time using -DSRC_HEIGHT (e.g.
+ * -DSRC_HEIGHT=16)
+ * @note The block's dimensions (K0 and N0) must be passed at compile time using -DK0 and -DN0 (e.g.
+ * -DK0=2, -DN0=2).
+ * @note The number of K0xN0 vertical blocks to store on the same output row must be passed at
+ * compile time using -DH0 (e.g. -DH0=2)
+ * @note If the K0xN0 blocks have to be interleaved, the option -DINTERLEAVE must passed at compile
+ * time.
+ * @note The option -DTRANSPOSE must passed at compile time.
+ * @note Only the following values for K0, N0 and H0 are supported:
+ * N0: 2,3,4,8,16
+ * K0: 2,3,4,8,16
+ * H0: greater than 0
+ *
+ * @param[in] src_ptr Pointer to the source RHS tensor. Supported data
+ * types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] src_stride_x Stride of the source RHS tensor in X dimension (in
+ * bytes)
+ * @param[in] src_step_x src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in] src_stride_y Stride of the source RHS tensor in Y dimension (in
+ * bytes)
+ * @param[in] src_step_y src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in] src_stride_z Stride of the source RHS tensor in Z dimension (in
+ * bytes)
+ * @param[in] src_step_z src_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source RHS
+ * tensor
+ * @param[out] dst_ptr Pointer to the destination matrix Supported data
+ * types: same as @p src_ptr
+ * @param[in] dst_stride_x Stride of the destination matrix in X dimension (in
+ * bytes)
+ * @param[in] dst_step_x dst_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination matrix in Y dimension (in
+ * bytes)
+ * @param[in] dst_step_y dst_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in] dst_step_z dst_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ */
+__kernel void gemm_reshape_rhs_matrix_t(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(dst))
+{
+ // Block size
+#define BLOCK_SIZE ((K0) * (N0))
+
+ // Output offset X
+#if defined(INTERLEAVE)
+#define OUTPUT_OFFSET_X (K0)
+#else // defined(INTERLEAVE)
+#define OUTPUT_OFFSET_X (BLOCK_SIZE)
+#endif // defined(INTERLEAVE)
+
+ // Output step X
+#if defined(INTERLEAVE)
+#define OUTPUT_STEP_X (K0) * (H0)
+#else // Do not interleave
+#define OUTPUT_STEP_X (K0)
+#endif // defined(INTERLEAVE)
+
+ // Compute source and destination addresses
+ uint x = get_global_id(0);
+ uint y = get_global_id(1);
+ uint z = get_global_id(2);
+
+ // ------------------ Compute input/output addresses ---------------------------
+
+ // Compute the input address
+ __global uchar *input_ptr = src_ptr + src_offset_first_element_in_bytes +
+ x * (uint)N0 * sizeof(DATA_TYPE) + y * (uint)K0 * src_stride_y +
+ z * (uint)src_stride_z;
+
+ // Compute the output address
+ __global uchar *output_ptr = dst_ptr + dst_offset_first_element_in_bytes +
+ (y * (uint)BLOCK_SIZE * (uint)H0 * sizeof(DATA_TYPE)) +
+ ((x % H0) * (uint)OUTPUT_OFFSET_X * sizeof(DATA_TYPE)) +
+ ((x / (uint)H0) * (uint)dst_stride_y) + z * (uint)dst_stride_z;
+
+ // ---------------------------Load input values --------------------------------
+ REPEAT_VAR_INIT_TO_CONST(K0, VEC_DATA_TYPE(DATA_TYPE, N0), a,
+ 0); // VEC_DATA_TYPE(DATA_TYPE, N0) a0=0, a1=0, ... a(K0-1)=0;
+
+ // Load values from the RHS matrix
+ a0 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 0 * src_stride_y));
+ if (y * (uint)K0 + 1 < SRC_HEIGHT)
+ {
+ a1 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 1 * src_stride_y));
+ }
+#if K0 > 2
+ if (y * (uint)K0 + 2 < SRC_HEIGHT)
+ {
+ a2 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 2 * src_stride_y));
+ }
+#endif // K0 > 2
+#if K0 > 3
+ if (y * (uint)K0 + 3 < SRC_HEIGHT)
+ {
+ a3 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 3 * src_stride_y));
+ }
+#endif // K0 > 3
+#if K0 > 4
+ if (y * (uint)K0 + 4 < SRC_HEIGHT)
+ {
+ a4 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 4 * src_stride_y));
+ }
+ if (y * (uint)K0 + 5 < SRC_HEIGHT)
+ {
+ a5 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 5 * src_stride_y));
+ }
+ if (y * (uint)K0 + 6 < SRC_HEIGHT)
+ {
+ a6 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 6 * src_stride_y));
+ }
+ if (y * (uint)K0 + 7 < SRC_HEIGHT)
+ {
+ a7 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 7 * src_stride_y));
+ }
+#endif // K0 > 4
+#if K0 > 8
+ if (y * (uint)K0 + 8 < SRC_HEIGHT)
+ {
+ a8 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 8 * src_stride_y));
+ }
+ if (y * (uint)K0 + 9 < SRC_HEIGHT)
+ {
+ a9 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 9 * src_stride_y));
+ }
+ if (y * (uint)K0 + 10 < SRC_HEIGHT)
+ {
+ aA = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 10 * src_stride_y));
+ }
+ if (y * (uint)K0 + 11 < SRC_HEIGHT)
+ {
+ aB = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 11 * src_stride_y));
+ }
+ if (y * (uint)K0 + 12 < SRC_HEIGHT)
+ {
+ aC = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 12 * src_stride_y));
+ }
+ if (y * (uint)K0 + 13 < SRC_HEIGHT)
+ {
+ aD = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 13 * src_stride_y));
+ }
+ if (y * (uint)K0 + 14 < SRC_HEIGHT)
+ {
+ aE = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 14 * src_stride_y));
+ }
+ if (y * (uint)K0 + 15 < SRC_HEIGHT)
+ {
+ aF = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 15 * src_stride_y));
+ }
+#endif // K0 > 8
+
+ // ---------------------------Transpose the block ------------------------------
+ REPEAT_VAR_INIT_TO_CONST(
+ N0, VEC_DATA_TYPE(DATA_TYPE, K0), res,
+ 0); // VEC_DATA_TYPE(DATA_TYPE, K0) res0=0, res1=0, res2=0,... res(N0-1)=0;
+
+#if K0 == 2
+ // This part computes the following transpositions:
+ // 2x2 -> 2x2
+ // 2x4 -> 4x2
+ // 2x8 -> 8x2
+ // 2x16 -> 16x2
+ res0 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s0, a1.s0);
+ res1 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s1, a1.s1);
+#if N0 > 2
+ res2 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s2, a1.s2);
+#endif // N0 > 2
+#if N0 > 3
+ res3 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s3, a1.s3);
+#endif // N0 > 3
+#if N0 > 4
+ res4 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s4, a1.s4);
+ res5 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s5, a1.s5);
+ res6 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s6, a1.s6);
+ res7 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s7, a1.s7);
+#endif // N0 > 4
+#if N0 > 8
+ res8 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s8, a1.s8);
+ res9 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s9, a1.s9);
+ resA = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sA, a1.sA);
+ resB = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sB, a1.sB);
+ resC = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sC, a1.sC);
+ resD = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sD, a1.sD);
+ resE = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sE, a1.sE);
+ resF = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sF, a1.sF);
+#endif // N0 > 8
+
+#elif K0 == 3 // K0 == 2
+ // This part computes the following transpositions:
+ // 3x2 -> 2x3
+ // 3x4 -> 4x3
+ // 3x8 -> 8x3
+ // 3x16 -> 16x3
+ res0 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s0, a1.s0, a2.s0);
+ res1 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s1, a1.s1, a2.s1);
+#if N0 > 2
+ res2 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s2, a1.s2, a2.s2);
+#endif // N0 > 2
+#if N0 > 3
+ res3 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s3, a1.s3, a2.s3);
+#endif // N0 > 3
+#if N0 > 4
+ res4 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s4, a1.s4, a2.s4);
+ res5 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s5, a1.s5, a2.s5);
+ res6 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s6, a1.s6, a2.s6);
+ res7 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s7, a1.s7, a2.s7);
+#endif // N0 > 4
+#if N0 > 8
+ res8 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s8, a1.s8, a2.s8);
+ res9 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s9, a1.s9, a2.s9);
+ resA = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sA, a1.sA, a2.sA);
+ resB = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sB, a1.sB, a2.sB);
+ resC = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sC, a1.sC, a2.sC);
+ resD = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sD, a1.sD, a2.sD);
+ resE = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sE, a1.sE, a2.sE);
+ resF = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sF, a1.sF, a2.sF);
+#endif // N0 > 8
+
+#elif K0 == 4 // K0 == 4
+ // This part computes the following transpositions:
+ // 4x2 -> 2x4
+ // 4x4 -> 4x4
+ // 4x8 -> 8x4
+ // 4x16 -> 16x4
+ res0 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s0, a1.s0, a2.s0, a3.s0);
+ res1 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s1, a1.s1, a2.s1, a3.s1);
+#if N0 > 2
+ res2 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s2, a1.s2, a2.s2, a3.s2);
+#endif // N0 > 2
+#if N0 > 3
+ res3 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s3, a1.s3, a2.s3, a3.s3);
+#endif // N0 > 3
+#if N0 > 4
+ res4 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s4, a1.s4, a2.s4, a3.s4);
+ res5 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s5, a1.s5, a2.s5, a3.s5);
+ res6 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s6, a1.s6, a2.s6, a3.s6);
+ res7 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s7, a1.s7, a2.s7, a3.s7);
+#endif // N0 > 4
+#if N0 > 8
+ res8 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s8, a1.s8, a2.s8, a3.s8);
+ res9 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s9, a1.s9, a2.s9, a3.s9);
+ resA = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sA, a1.sA, a2.sA, a3.sA);
+ resB = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sB, a1.sB, a2.sB, a3.sB);
+ resC = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sC, a1.sC, a2.sC, a3.sC);
+ resD = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sD, a1.sD, a2.sD, a3.sD);
+ resE = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sE, a1.sE, a2.sE, a3.sE);
+ resF = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sF, a1.sF, a2.sF, a3.sF);
+#endif // N0 > 8
+
+#elif K0 == 8 // K0 == 8
+ // This part computes the following transpositions:
+ // 8x2 -> 2x8
+ // 8x4 -> 4x8
+ // 8x8 -> 8x8
+ // 8x16 -> 16x8
+ res0 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s0, a1.s0, a2.s0, a3.s0, a4.s0, a5.s0, a6.s0, a7.s0);
+ res1 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s1, a1.s1, a2.s1, a3.s1, a4.s1, a5.s1, a6.s1, a7.s1);
+#if N0 > 2
+ res2 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s2, a1.s2, a2.s2, a3.s2, a4.s2, a5.s2, a6.s2, a7.s2);
+#endif // N0 > 2
+#if N0 > 3
+ res3 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s3, a1.s3, a2.s3, a3.s3, a4.s3, a5.s3, a6.s3, a7.s3);
+#endif // N0 > 3
+#if N0 > 4
+ res4 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s4, a1.s4, a2.s4, a3.s4, a4.s4, a5.s4, a6.s4, a7.s4);
+ res5 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s5, a1.s5, a2.s5, a3.s5, a4.s5, a5.s5, a6.s5, a7.s5);
+ res6 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s6, a1.s6, a2.s6, a3.s6, a4.s6, a5.s6, a6.s6, a7.s6);
+ res7 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s7, a1.s7, a2.s7, a3.s7, a4.s7, a5.s7, a6.s7, a7.s7);
+#endif // N0 > 4
+#if N0 > 8
+ res8 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s8, a1.s8, a2.s8, a3.s8, a4.s8, a5.s8, a6.s8, a7.s8);
+ res9 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s9, a1.s9, a2.s9, a3.s9, a4.s9, a5.s9, a6.s9, a7.s9);
+ resA = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sA, a1.sA, a2.sA, a3.sA, a4.sA, a5.sA, a6.sA, a7.sA);
+ resB = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sB, a1.sB, a2.sB, a3.sB, a4.sB, a5.sB, a6.sB, a7.sB);
+ resC = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sC, a1.sC, a2.sC, a3.sC, a4.sC, a5.sC, a6.sC, a7.sC);
+ resD = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sD, a1.sD, a2.sD, a3.sD, a4.sD, a5.sD, a6.sD, a7.sD);
+ resE = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sE, a1.sE, a2.sE, a3.sE, a4.sE, a5.sE, a6.sE, a7.sE);
+ resF = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sF, a1.sF, a2.sF, a3.sF, a4.sF, a5.sF, a6.sF, a7.sF);
+#endif // N0 > 8
+
+#elif K0 == 16 // K0 == 16
+
+ // This part computes the following transpositions:
+ // 16x2 -> 2x16
+ // 16x4 -> 4x16
+ // 16x8 -> 8x16
+ // 16x16 -> 16x16
+ res0 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s0, a1.s0, a2.s0, a3.s0, a4.s0, a5.s0, a6.s0, a7.s0,
+ a8.s0, a9.s0, aA.s0, aB.s0, aC.s0, aD.s0, aE.s0, aF.s0);
+ res1 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s1, a1.s1, a2.s1, a3.s1, a4.s1, a5.s1, a6.s1, a7.s1,
+ a8.s1, a9.s1, aA.s1, aB.s1, aC.s1, aD.s1, aE.s1, aF.s1);
+#if N0 > 2
+ res2 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s2, a1.s2, a2.s2, a3.s2, a4.s2, a5.s2, a6.s2, a7.s2,
+ a8.s2, a9.s2, aA.s2, aB.s2, aC.s2, aD.s2, aE.s2, aF.s2);
+#endif // N0 > 2
+#if N0 > 3
+ res3 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s3, a1.s3, a2.s3, a3.s3, a4.s3, a5.s3, a6.s3, a7.s3,
+ a8.s3, a9.s3, aA.s3, aB.s3, aC.s3, aD.s3, aE.s3, aF.s3);
+#endif // N0 > 3
+#if N0 > 4
+ res4 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s4, a1.s4, a2.s4, a3.s4, a4.s4, a5.s4, a6.s4, a7.s4,
+ a8.s4, a9.s4, aA.s4, aB.s4, aC.s4, aD.s4, aE.s4, aF.s4);
+ res5 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s5, a1.s5, a2.s5, a3.s5, a4.s5, a5.s5, a6.s5, a7.s5,
+ a8.s5, a9.s5, aA.s5, aB.s5, aC.s5, aD.s5, aE.s5, aF.s5);
+ res6 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s6, a1.s6, a2.s6, a3.s6, a4.s6, a5.s6, a6.s6, a7.s6,
+ a8.s6, a9.s6, aA.s6, aB.s6, aC.s6, aD.s6, aE.s6, aF.s6);
+ res7 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s7, a1.s7, a2.s7, a3.s7, a4.s7, a5.s7, a6.s7, a7.s7,
+ a8.s7, a9.s7, aA.s7, aB.s7, aC.s7, aD.s7, aE.s7, aF.s7);
+#endif // N0 > 4
+#if N0 > 8
+ res8 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s8, a1.s8, a2.s8, a3.s8, a4.s8, a5.s8, a6.s8, a7.s8,
+ a8.s8, a9.s8, aA.s8, aB.s8, aC.s8, aD.s8, aE.s8, aF.s8);
+ res9 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s9, a1.s9, a2.s9, a3.s9, a4.s9, a5.s9, a6.s9, a7.s9,
+ a8.s9, a9.s9, aA.s9, aB.s9, aC.s9, aD.s9, aE.s9, aF.s9);
+ resA = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sA, a1.sA, a2.sA, a3.sA, a4.sA, a5.sA, a6.sA, a7.sA,
+ a8.sA, a9.sA, aA.sA, aB.sA, aC.sA, aD.sA, aE.sA, aF.sA);
+ resB = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sB, a1.sB, a2.sB, a3.sB, a4.sB, a5.sB, a6.sB, a7.sB,
+ a8.sB, a9.sB, aA.sB, aB.sB, aC.sB, aD.sB, aE.sB, aF.sB);
+ resC = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sC, a1.sC, a2.sC, a3.sC, a4.sC, a5.sC, a6.sC, a7.sC,
+ a8.sC, a9.sC, aA.sC, aB.sC, aC.sC, aD.sC, aE.sC, aF.sC);
+ resD = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sD, a1.sD, a2.sD, a3.sD, a4.sD, a5.sD, a6.sD, a7.sD,
+ a8.sD, a9.sD, aA.sD, aB.sD, aC.sD, aD.sD, aE.sD, aF.sD);
+ resE = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sE, a1.sE, a2.sE, a3.sE, a4.sE, a5.sE, a6.sE, a7.sE,
+ a8.sE, a9.sE, aA.sE, aB.sE, aC.sE, aD.sE, aE.sE, aF.sE);
+ resF = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sF, a1.sF, a2.sF, a3.sF, a4.sF, a5.sF, a6.sF, a7.sF,
+ a8.sF, a9.sF, aA.sF, aB.sF, aC.sF, aD.sF, aE.sF, aF.sF);
+#endif // N0 > 8
+
+#else // N0 == 16
+#error "Not supported N0 value"
+#endif // N0 > 2
+
+ // ---------------------------Store the output values ------------------------------
+ REPEAT_VAR_INIT_TO_CONST(16, uint, zout, 0);
+ STORE_BLOCK(N0, K0, DATA_TYPE, res, output_ptr, OUTPUT_STEP_X * sizeof(DATA_TYPE), zout);
+
+#undef BLOCK_SIZE
+#undef OUTPUT_OFFSET_X
+#undef OUTPUT_STEP_X
+}
+#endif // defined(TRANSPOSE)
+#endif // defined(K0) && defined(N0) && defined(H0) && defined(DATA_TYPE) && defined(SRC_HEIGHT)
+
+#if defined(M0) && defined(N0) && defined(K0) && defined(H0) && defined(DATA_TYPE) && \
+ defined(M) && defined(N) && defined(K)
+
+#define CONCAT(a, b) a##b
+
+#define ARM_DOT1(a, b, c) ({ c = fma(a, b, c); })
+#define ARM_DOT2(a, b, c) \
+ ({ \
+ c = fma(a.s0, b.s0, c); \
+ c = fma(a.s1, b.s1, c); \
+ })
+#define ARM_DOT3(a, b, c) \
+ ({ \
+ ARM_DOT2(a, b, c); \
+ c = fma((a.s2), (b.s2), c); \
+ })
+#define ARM_DOT4(a, b, c) \
+ ({ \
+ ARM_DOT3(a, b, c); \
+ c = fma((a.s3), (b.s3), c); \
+ })
+#define ARM_DOT8(a, b, c) \
+ ({ \
+ ARM_DOT4((a.lo), (b.lo), c); \
+ ARM_DOT4((a.hi), (b.hi), c); \
+ })
+#define ARM_DOT16(a, b, c) \
+ ({ \
+ ARM_DOT8((a.lo), (b.lo), c); \
+ ARM_DOT8((a.hi), (b.hi), c); \
+ })
+
+#if N0 == 2
+#define ARM_DOT_K0XN0(k0, a, b, c) \
+ ({ \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##0), (c.s0)); \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##1), (c.s1)); \
+ })
+#elif N0 == 3 // N0 == 3
+#define ARM_DOT_K0XN0(k0, a, b, c) \
+ ({ \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##0), (c.s0)); \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##1), (c.s1)); \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##2), (c.s2)); \
+ })
+#elif N0 == 4 // N0 == 4
+#define ARM_DOT_K0XN0(k0, a, b, c) \
+ ({ \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##0), (c.s0)); \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##1), (c.s1)); \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##2), (c.s2)); \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##3), (c.s3)); \
+ })
+#elif N0 == 8 // N0 == 8
+#define ARM_DOT_K0XN0(k0, a, b, c) \
+ ({ \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##0), (c.s0)); \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##1), (c.s1)); \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##2), (c.s2)); \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##3), (c.s3)); \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##4), (c.s4)); \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##5), (c.s5)); \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##6), (c.s6)); \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##7), (c.s7)); \
+ })
+#elif N0 == 16 // N0 == 16
+#define ARM_DOT_K0XN0(k0, a, b, c) \
+ ({ \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##0), (c.s0)); \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##1), (c.s1)); \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##2), (c.s2)); \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##3), (c.s3)); \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##4), (c.s4)); \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##5), (c.s5)); \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##6), (c.s6)); \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##7), (c.s7)); \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##8), (c.s8)); \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##9), (c.s9)); \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##A), (c.sA)); \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##B), (c.sB)); \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##C), (c.sC)); \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##D), (c.sD)); \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##E), (c.sE)); \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b##F), (c.sF)); \
+ })
+#else // N0 not supported
+#error "N0 value not supported"
+#endif // N0 conditions
+
+/** This OpenCL kernel computes the matrix multiplication between 2 matrices.
+ * The LHS matrix is NOT reshaped
+ * The RHS is reshaped with @ref CLGEMMReshapeRHSMatrixKernel and the block K0xN0 is transposed
+ *
+ * @note If the first two dimensions of NDRange have been dispatched with "dummy_work_items"
+ * support, the option -DDUMMY_WORK_ITEMS must be passed at compile time.
+ * @note The GEMM's dimensions (M,N and K) must be passed at compile time using -DM, -DN and and -DK
+ * (e.g. -DM=52, -DN=30 and -DK=90)
+ * @note The number of columns of LHS matrix must be passed at compile time using -DK (e.g. -DK=64)
+ * @note The block's dimensions used for reshaping the RHS matrix (N0 and K0) must be passed at
+ * compile time using -DN0 and -DK0 (e.g. -DN0=8, -DK0=4).
+ * @note The number of M0 rows to process must be passed at compile time using -DM0 (e.g. -DM0=2)
+ * @note The number of K0xN0 horizontal blocks stored on the same output row of the reshaped RHS
+ * matrix must be passed at compile time using -DH0 (e.g. -DH0=2)
+ * @note If the K0xN0 blocks in the reshaped RHS matrix have been interleaved, the option
+ * -DRHS_INTERLEAVE must passed at compile time.
+ * @note Only the following configurations of M0, N0 and K0 are currently supported:
+ * - M0 = 1, 2, 3, 4, 5, 6, 7, 8
+ * - N0 = 2, 3, 4, 8, 16
+ * - K0 = 2, 3, 4, 8, 16
+ * - H0 >= 1
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ * -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ * -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ * -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ * -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ * (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns LHS matrix
+ *
+ * @param[in] lhs_ptr Pointer to the LHS matrix. Supported data type:
+ * F16/F32
+ * @param[in] lhs_stride_x Stride of the LHS matrix in X dimension (in bytes)
+ * @param[in] lhs_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] lhs_stride_y Stride of the LHS matrix in Y dimension (in bytes)
+ * @param[in] lhs_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] lhs_offset_first_element_in_bytes The offset of the first element in the LHS matrix
+ * @param[in] rhs_ptr Pointer to the RHS reshaped matrix. Supported data
+ * type: same as @p lhs_ptr
+ * @param[in] rhs_stride_x Stride of the RHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in] rhs_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] rhs_stride_y Stride of the RHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in] rhs_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] rhs_offset_first_element_in_bytes The offset of the first element in the RHS
+ * reshaped matrix
+ * @param[in] bias_ptr (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in] bias_stride_x (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in] bias_step_x (Optional) bias_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] bias_stride_y (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in] bias_step_y (Optional) bias_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in] bias_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr Pointer to the destination matrix Supported data
+ * type: same as @p lhs_ptr
+ * @param[in] dst_stride_x Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in] dst_step_x dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in] dst_step_y dst_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in] lhs_stride_z Stride of the LHS matrix in Z dimension (in bytes)
+ * @param[in] rhs_stride_z Stride of the RHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in] bias_stride_z (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in] dst_stride_z Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in] lhs_cross_plane_pad (Optional) Bottom paddings for LHS matrix in unit
+ * of elements (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in] dst_cross_plane_pad (Optional) Bottom paddings for the output matrix
+ * in unit of elements (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_reshaped_only_rhs_t(IMAGE_DECLARATION(lhs), IMAGE_DECLARATION(rhs),
+#if defined(BETA)
+ IMAGE_DECLARATION(bias),
+#endif // defined(BETA)
+ IMAGE_DECLARATION(dst), uint lhs_stride_z,
+ uint rhs_stride_z,
+#if defined(BETA)
+ uint bias_stride_z,
+#endif // defined(BETA)
+ uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+ ,
+ uint lhs_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ ,
+ uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+ // Block size
+#define RHS_BLOCK_SIZE ((K0) * (N0))
+
+ // RHS offset and step X
+#if defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (K0)
+#define RHS_STEP_X ((K0) * (H0))
+#define RHS_STEP_LOOP (1)
+#else // defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (RHS_BLOCK_SIZE)
+#define RHS_STEP_X (K0)
+#define RHS_STEP_LOOP (H0)
+#endif // defined(RHS_INTERLEAVE)
+
+ uint x = get_global_id(0);
+ uint y = get_global_id(1);
+ uint z = get_global_id(2);
+
+#if defined(DUMMY_WORK_ITEMS)
+ if ((x * N0 >= N) || (y * M0 >= M))
+ {
+ return;
+ }
+#endif // defined(DUMMY_WORK_ITEMS)
+
+ // Compute LHS matrix address
+ uint lhs_offset = lhs_offset_first_element_in_bytes + y * M0 * (uint)lhs_stride_y;
+
+ // Compute RHS reshaped matrix address
+ uint rhs_offset = rhs_offset_first_element_in_bytes +
+ (x % H0) * (uint)RHS_OFFSET_X * sizeof(DATA_TYPE) +
+ (x / (uint)H0) * rhs_stride_y;
+
+#if defined(MATRIX_B_DEPTH)
+ // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+ rhs_offset += (z % MATRIX_B_DEPTH) * rhs_stride_z;
+#else // defined(MATRIX_B_DEPTH)
+ rhs_offset += z * rhs_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+ REPEAT_VAR_INIT_TO_CONST(8, uint, zlhs, 0); // uint zlhs0=0,zlhs1=0,zlhs2=0,... zlhs7=0;
+ REPEAT_VAR_INIT_TO_CONST(16, uint, zero, 0);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+ // The plane (zlhs) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+ CALCULATE_Z_OFFSET(M0, uint, zlhs, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, lhs_cross_plane_pad,
+ lhs_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply lhs_stride_z by DEPTH_GEMM3D
+ lhs_offset += z * lhs_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+ // Add offset for batched GEMM
+ lhs_offset += z * lhs_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+ // Initialize the accumulators
+ REPEAT_VAR_INIT_TO_CONST(M0, VEC_DATA_TYPE(DATA_TYPE, N0), c,
+ 0); // VEC_DATA_TYPE(DATA_TYPE, N0) c0=0,c1=0,c2=0,... c(M0-1)=0;
+
+ int i = 0;
+ for (; i <= (K - K0); i += K0)
+ {
+ // Supported cases (M0, K0):
+ // 1,2 - 1,3 - 1,4 - 1,8 - 1,16
+ // 2,2 - 2,3 - 2,4 - 2,8 - 2,16
+ // 3,2 - 3,3 - 3,4 - 3,8 - 3,16
+ // 4,2 - 4,3 - 4,4 - 4,8 - 4,16
+ // 5,2 - 5,3 - 5,4 - 5,8 - 5,16
+ // 6,2 - 6,3 - 6,4 - 6,8 - 6,16
+ // 7,2 - 7,3 - 7,4 - 7,8 - 7,16
+ // 8,2 - 8,3 - 8,4 - 8,8 - 8,16
+ // Load values from LHS matrix
+ LOAD_BLOCK(M0, K0, DATA_TYPE, a, lhs_ptr, lhs_offset, lhs_stride_y, zlhs);
+
+ // Load values from RHS reshaped matrix
+ LOAD_BLOCK(N0, K0, DATA_TYPE, b, rhs_ptr, rhs_offset, RHS_STEP_X * sizeof(DATA_TYPE), zero);
+
+ // Accumulate
+ ARM_DOT_K0XN0(K0, a0, b, c0);
+#if M0 > 1
+ ARM_DOT_K0XN0(K0, a1, b, c1);
+#endif // M0 > 1
+#if M0 > 2
+ ARM_DOT_K0XN0(K0, a2, b, c2);
+#endif // M0 > 2
+#if M0 > 3
+ ARM_DOT_K0XN0(K0, a3, b, c3);
+#endif // M0 > 3
+#if M0 > 4
+ ARM_DOT_K0XN0(K0, a4, b, c4);
+#endif // M0 > 4
+#if M0 > 5
+ ARM_DOT_K0XN0(K0, a5, b, c5);
+#endif // M0 > 5
+#if M0 > 6
+ ARM_DOT_K0XN0(K0, a6, b, c6);
+#endif // M0 > 6
+#if M0 > 7
+ ARM_DOT_K0XN0(K0, a7, b, c7);
+#endif // M0 > 7
+
+ lhs_offset += K0 * sizeof(DATA_TYPE);
+ rhs_offset += (N0 * RHS_STEP_X * RHS_STEP_LOOP) * sizeof(DATA_TYPE);
+ }
+
+ // Left-over accumulations
+ for (; i < K; ++i)
+ {
+ // Load values from LHS matrix
+ LOAD_BLOCK(M0, 1, DATA_TYPE, a, lhs_ptr, lhs_offset, lhs_stride_y, zlhs);
+
+ // Load values from RHS reshaped matrix
+ LOAD_BLOCK(N0, 1, DATA_TYPE, b, rhs_ptr, rhs_offset, RHS_STEP_X * sizeof(DATA_TYPE), zero);
+
+ // Accumulate
+ ARM_DOT_K0XN0(1, a0, b, c0);
+#if M0 > 1
+ ARM_DOT_K0XN0(1, a1, b, c1);
+#endif // M0 > 1
+#if M0 > 2
+ ARM_DOT_K0XN0(1, a2, b, c2);
+#endif // M0 > 2
+#if M0 > 3
+ ARM_DOT_K0XN0(1, a3, b, c3);
+#endif // M0 > 3
+#if M0 > 4
+ ARM_DOT_K0XN0(1, a4, b, c4);
+#endif // M0 > 4
+#if M0 > 5
+ ARM_DOT_K0XN0(1, a5, b, c5);
+#endif // M0 > 5
+#if M0 > 6
+ ARM_DOT_K0XN0(1, a6, b, c6);
+#endif // M0 > 6
+#if M0 > 7
+ ARM_DOT_K0XN0(1, a7, b, c7);
+#endif // M0 > 7
+
+ lhs_offset += sizeof(DATA_TYPE);
+ rhs_offset += sizeof(DATA_TYPE);
+ }
+
+ __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes +
+ (x * (uint)N0 * sizeof(DATA_TYPE)) + (y * (uint)M0 * dst_stride_y);
+
+ REPEAT_VAR_INIT_TO_CONST(8, uint, zout, 0); // uint zout0=0,zout1=0,zout2=0,... zout7=0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // The plane (zout) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+ CALCULATE_Z_OFFSET(M0, uint, zout, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad,
+ dst_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply dst_stride_z by DEPTH_GEMM3D
+ dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Add offset for batched GEMM
+ dst_addr += z * dst_stride_z;
+
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+ SCALE_BLOCK(M0, DATA_TYPE, c, ALPHA);
+#endif // defined(ALPHA)
+
+ // Add beta*bias
+#if defined(BETA)
+#if defined(BROADCAST_BIAS)
+ __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes +
+ (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE));
+
+ LOAD_BLOCK(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero);
+
+#ifndef UNIT_BETA
+ SCALE_BLOCK(1, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+ // c = c + bias[broadcasted]
+ ADD_BLOCK_BROADCAST(M0, c, bias0);
+
+#else // defined(BROADCAST_BIAS)
+ __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes +
+ (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE)) +
+ (get_global_id(1) * (uint)M0 * bias_stride_y) +
+ get_global_id(2) * bias_stride_z;
+
+ LOAD_BLOCK(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero);
+
+#ifndef UNIT_BETA
+ SCALE_BLOCK(M0, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+ // c = c + bias
+ ADD_BLOCK(M0, c, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+ ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, c, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+ // Store output block
+ STORE_BLOCK(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout);
+
+#undef RHS_BLOCK_SIZE
+#undef RHS_OFFSET_X
+#undef RHS_STEP_X
+}
+
+#define VFMA(a, b, c) ({ c = fma(a, b, c); })
+
+#if M0 == 1
+#define LD_RHS_VFMA_M0xN0(i, a, c) \
+ ({ \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ b = VLOAD(N0)( \
+ 0, (__global DATA_TYPE *)(rhs_ptr + rhs_offset + 0x##i * RHS_STEP_X * sizeof(DATA_TYPE))); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); \
+ })
+#elif M0 == 2 // M0 == 2
+#define LD_RHS_VFMA_M0xN0(i, a, c) \
+ ({ \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ b = VLOAD(N0)( \
+ 0, (__global DATA_TYPE *)(rhs_ptr + rhs_offset + 0x##i * RHS_STEP_X * sizeof(DATA_TYPE))); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1)); \
+ })
+#elif M0 == 3 // M0 == 3
+#define LD_RHS_VFMA_M0xN0(i, a, c) \
+ ({ \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ b = VLOAD(N0)( \
+ 0, (__global DATA_TYPE *)(rhs_ptr + rhs_offset + 0x##i * RHS_STEP_X * sizeof(DATA_TYPE))); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2)); \
+ })
+#elif M0 == 4 // M0 == 4
+#define LD_RHS_VFMA_M0xN0(i, a, c) \
+ ({ \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ b = VLOAD(N0)( \
+ 0, (__global DATA_TYPE *)(rhs_ptr + rhs_offset + 0x##i * RHS_STEP_X * sizeof(DATA_TYPE))); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##3).s##i), b, (c##3)); \
+ })
+#elif M0 == 5 // M0 == 5
+#define LD_RHS_VFMA_M0xN0(i, a, c) \
+ ({ \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ b = VLOAD(N0)( \
+ 0, (__global DATA_TYPE *)(rhs_ptr + rhs_offset + 0x##i * RHS_STEP_X * sizeof(DATA_TYPE))); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##3).s##i), b, (c##3)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##4).s##i), b, (c##4)); \
+ })
+#elif M0 == 6 // M0 == 6
+#define LD_RHS_VFMA_M0xN0(i, a, c) \
+ ({ \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ b = VLOAD(N0)( \
+ 0, (__global DATA_TYPE *)(rhs_ptr + rhs_offset + 0x##i * RHS_STEP_X * sizeof(DATA_TYPE))); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##3).s##i), b, (c##3)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##4).s##i), b, (c##4)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##5).s##i), b, (c##5)); \
+ })
+#elif M0 == 7 // M0 == 7
+#define LD_RHS_VFMA_M0xN0(i, a, c) \
+ ({ \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ b = VLOAD(N0)( \
+ 0, (__global DATA_TYPE *)(rhs_ptr + rhs_offset + 0x##i * RHS_STEP_X * sizeof(DATA_TYPE))); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##3).s##i), b, (c##3)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##4).s##i), b, (c##4)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##5).s##i), b, (c##5)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##6).s##i), b, (c##6)); \
+ })
+#elif M0 == 8 // M0 == 8
+#define LD_RHS_VFMA_M0xN0(i, a, c) \
+ ({ \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ b = VLOAD(N0)( \
+ 0, (__global DATA_TYPE *)(rhs_ptr + rhs_offset + 0x##i * RHS_STEP_X * sizeof(DATA_TYPE))); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##3).s##i), b, (c##3)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##4).s##i), b, (c##4)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##5).s##i), b, (c##5)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##6).s##i), b, (c##6)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##7).s##i), b, (c##7)); \
+ })
+#else // M0 not supported
+#error "M0 not supported"
+#endif // M0 not supported
+
+/** This OpenCL kernel computes the matrix multiplication between 2 matrices.
+ * The LHS matrix is NOT reshaped
+ * The RHS is reshaped with @ref CLGEMMReshapeRHSMatrixKernel and the block K0xN0 is NOT transposed
+ *
+ * @note If the first two dimensions of NDRange have been dispatched with "dummy_work_items"
+ * support, the option -DDUMMY_WORK_ITEMS must be passed at compile time.
+ * @note The GEMM's dimensions (M,N and K) must be passed at compile time using -DM, -DN and and -DK
+ * (e.g. -DM=52, -DN=30 and -DK=90).
+ * @note The block's dimensions used for reshaping the RHS matrix (N0 and K0) must be passed at
+ * compile time using -DN0 and -DK0 (e.g. -DN0=8, -DK0=4).
+ * @note The number of M0 rows to process must be passed at compile time using -DM0 (e.g. -DM0=2)
+ * @note The number of K0xN0 horizontal blocks stored on the same output row of the reshaped RHS
+ * matrix must be passed at compile time using -DH0 (e.g. -DH0=2)
+ * @note If the K0xN0 blocks in the reshaped RHS matrix have been interleaved, the option
+ * -DRHS_INTERLEAVE must passed at compile time.
+ * @note Only the following configurations of M0, N0 and K0 are currently supported:
+ * - M0 = 1, 2, 3, 4, 5, 6, 7, 8
+ * - N0 = 2, 3, 4, 8, 16
+ * - K0 = 2, 3, 4, 8, 16
+ * - H0 >= 1
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ * -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ * -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ * -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ * -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ * (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns LHS matrix
+ *
+ * @param[in] lhs_ptr Pointer to the LHS matrix. Supported data type:
+ * F16/F32
+ * @param[in] lhs_stride_x Stride of the LHS matrix in X dimension (in bytes)
+ * @param[in] lhs_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] lhs_stride_y Stride of the LHS matrix in Y dimension (in bytes)
+ * @param[in] lhs_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] lhs_offset_first_element_in_bytes The offset of the first element in the LHS matrix
+ * @param[in] rhs_ptr Pointer to the RHS reshaped matrix. Supported data
+ * type: same as @p lhs_ptr
+ * @param[in] rhs_stride_x Stride of the RHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in] rhs_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] rhs_stride_y Stride of the RHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in] rhs_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] rhs_offset_first_element_in_bytes The offset of the first element in the RHS
+ * reshaped matrix
+ * @param[in] bias_ptr (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in] bias_stride_x (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in] bias_step_x (Optional) bias_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] bias_stride_y (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in] bias_step_y (Optional) bias_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in] bias_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr Pointer to the destination matrix Supported data
+ * type: same as @p lhs_ptr
+ * @param[in] dst_stride_x Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in] dst_step_x dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in] dst_step_y dst_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in] lhs_stride_z Stride of the LHS matrix in Z dimension (in bytes)
+ * @param[in] rhs_stride_z Stride of the RHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in] bias_stride_z (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in] dst_stride_z Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in] lhs_cross_plane_pad (Optional) Bottom paddings for LHS matrix in unit
+ * of elements (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in] dst_cross_plane_pad (Optional) Bottom paddings for the output matrix
+ * in unit of elements (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_reshaped_only_rhs_nt(IMAGE_DECLARATION(lhs), IMAGE_DECLARATION(rhs),
+#if defined(BETA)
+ IMAGE_DECLARATION(bias),
+#endif // defined(BETA)
+ IMAGE_DECLARATION(dst), uint lhs_stride_z,
+ uint rhs_stride_z,
+#if defined(BETA)
+ uint bias_stride_z,
+#endif // defined(BETA)
+ uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+ ,
+ uint lhs_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ ,
+ uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+ // Block size
+#define RHS_BLOCK_SIZE ((K0) * (N0))
+
+ // RHS offset and step X
+#if defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (N0)
+#define RHS_STEP_X ((N0) * (H0))
+#define RHS_STEP_LOOP (1)
+#else // defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (RHS_BLOCK_SIZE)
+#define RHS_STEP_X (N0)
+#define RHS_STEP_LOOP (H0)
+#endif // defined(RHS_INTERLEAVE)
+
+ uint x = get_global_id(0);
+ uint y = get_global_id(1);
+ uint z = get_global_id(2);
+
+#if defined(DUMMY_WORK_ITEMS)
+ if ((x * N0 >= N) || (y * M0 >= M))
+ {
+ return;
+ }
+#endif // defined(DUMMY_WORK_ITEMS)
+
+ // Compute LHS matrix address
+ uint lhs_offset = lhs_offset_first_element_in_bytes + y * M0 * (uint)lhs_stride_y;
+
+ // Compute RHS reshaped matrix address
+ uint rhs_offset = rhs_offset_first_element_in_bytes +
+ (x % H0) * (uint)RHS_OFFSET_X * sizeof(DATA_TYPE) +
+ (x / (uint)H0) * rhs_stride_y;
+
+#if defined(MATRIX_B_DEPTH)
+ // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+ rhs_offset += (z % MATRIX_B_DEPTH) * rhs_stride_z;
+#else // defined(MATRIX_B_DEPTH)
+ rhs_offset += z * rhs_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+ REPEAT_VAR_INIT_TO_CONST(8, uint, zin, 0); // uint zin0=0,zin1=0,zin2=0,... zin7=0;
+ REPEAT_VAR_INIT_TO_CONST(16, uint, zero, 0); // uint zero0=0,zero1=0,zero2=0,... zero7=0;
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+
+ // The plane (zin) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+ CALCULATE_Z_OFFSET(M0, uint, zin, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, lhs_cross_plane_pad,
+ lhs_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply lhs_stride_z by DEPTH_GEMM3D
+ lhs_offset += z * lhs_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+ // Add offset for batched GEMM
+ lhs_offset += z * lhs_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+ // Initialize the accumulators
+ REPEAT_VAR_INIT_TO_CONST(M0, VEC_DATA_TYPE(DATA_TYPE, N0), c,
+ 0); // VEC_DATA_TYPE(DATA_TYPE, N0) c0=0,c1=0,c2=0,... c(N0-1)=0;
+
+ int i = 0;
+ for (; i <= (K - K0); i += K0)
+ {
+ // Supported cases (M0, K0):
+ // 1,2 - 1,3 - 1,4 - 1,8 - 1,16
+ // 2,2 - 2,3 - 2,4 - 2,8 - 2,16
+ // 3,2 - 3,3 - 3,4 - 3,8 - 3,16
+ // 4,2 - 4,3 - 4,4 - 4,8 - 4,16
+ // 5,2 - 5,3 - 5,4 - 5,8 - 5,16
+ // 6,2 - 6,3 - 6,4 - 6,8 - 6,16
+ // 7,2 - 7,3 - 7,4 - 7,8 - 7,16
+ // 8,2 - 8,3 - 8,4 - 8,8 - 8,16
+ // Load values from LHS matrix
+ LOAD_BLOCK(M0, K0, DATA_TYPE, a, lhs_ptr, lhs_offset, lhs_stride_y, zin);
+
+ LD_RHS_VFMA_M0xN0(0, a, c);
+ LD_RHS_VFMA_M0xN0(1, a, c);
+#if K0 > 2
+ LD_RHS_VFMA_M0xN0(2, a, c);
+#endif // K0 > 2
+#if K0 > 3
+ LD_RHS_VFMA_M0xN0(3, a, c);
+#endif // K0 > 3
+#if K0 > 4
+ LD_RHS_VFMA_M0xN0(4, a, c);
+ LD_RHS_VFMA_M0xN0(5, a, c);
+ LD_RHS_VFMA_M0xN0(6, a, c);
+ LD_RHS_VFMA_M0xN0(7, a, c);
+#endif // K0 > 4
+#if K0 > 8
+ LD_RHS_VFMA_M0xN0(8, a, c);
+ LD_RHS_VFMA_M0xN0(9, a, c);
+ LD_RHS_VFMA_M0xN0(A, a, c);
+ LD_RHS_VFMA_M0xN0(B, a, c);
+ LD_RHS_VFMA_M0xN0(C, a, c);
+ LD_RHS_VFMA_M0xN0(D, a, c);
+ LD_RHS_VFMA_M0xN0(E, a, c);
+ LD_RHS_VFMA_M0xN0(F, a, c);
+#endif // K0 > 8
+
+ lhs_offset += K0 * sizeof(DATA_TYPE);
+ rhs_offset += K0 * RHS_STEP_X * RHS_STEP_LOOP * sizeof(DATA_TYPE);
+ }
+
+ // Left-over accumulations
+ for (; i < K; ++i)
+ {
+ // Load values from LHS matrix
+ VEC_DATA_TYPE(DATA_TYPE, 2)
+ a0 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 0 * lhs_stride_y + zin0));
+#if M0 > 1
+ VEC_DATA_TYPE(DATA_TYPE, 2)
+ a1 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 1 * lhs_stride_y + zin1));
+#endif // M0 > 1
+#if M0 > 2
+ VEC_DATA_TYPE(DATA_TYPE, 2)
+ a2 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 2 * lhs_stride_y + zin2));
+#endif // M0 > 2
+#if M0 > 3
+ VEC_DATA_TYPE(DATA_TYPE, 2)
+ a3 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 3 * lhs_stride_y + zin3));
+#endif // M0 > 3
+#if M0 > 4
+ VEC_DATA_TYPE(DATA_TYPE, 2)
+ a4 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 4 * lhs_stride_y + zin4));
+#endif // M0 > 4
+#if M0 > 5
+ VEC_DATA_TYPE(DATA_TYPE, 2)
+ a5 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 5 * lhs_stride_y + zin5));
+#endif // M0 > 5
+#if M0 > 6
+ VEC_DATA_TYPE(DATA_TYPE, 2)
+ a6 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 6 * lhs_stride_y + zin6));
+#endif // M0 > 6
+#if M0 > 7
+ VEC_DATA_TYPE(DATA_TYPE, 2)
+ a7 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 7 * lhs_stride_y + zin7));
+#endif // M0 > 7
+
+ LD_RHS_VFMA_M0xN0(0, a, c);
+
+ lhs_offset += sizeof(DATA_TYPE);
+ rhs_offset += RHS_STEP_X * sizeof(DATA_TYPE);
+ }
+
+ __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes +
+ (x * (uint)N0 * sizeof(DATA_TYPE)) + (y * (uint)M0 * dst_stride_y);
+
+ REPEAT_VAR_INIT_TO_CONST(8, uint, zout, 0); // uint zout0=0,zout1=0,zout2=0,... zout7=0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ // The plane (zout) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+ CALCULATE_Z_OFFSET(M0, uint, zout, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad,
+ dst_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply dst_stride_z by DEPTH_GEMM3D
+ dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Add offset for batched GEMM
+ dst_addr += z * dst_stride_z;
+
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+ SCALE_BLOCK(M0, DATA_TYPE, c, ALPHA);
+#endif // defined(ALPHA)
+
+ // Add beta*bias
+#if defined(BETA)
+#if defined(BROADCAST_BIAS)
+ __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes +
+ (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE));
+
+ LOAD_BLOCK(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero);
+
+#ifndef UNIT_BETA
+ SCALE_BLOCK(1, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+ // c = c + bias[broadcasted]
+ ADD_BLOCK_BROADCAST(M0, c, bias0);
+
+#else // defined(BROADCAST_BIAS)
+ __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes +
+ (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE)) +
+ (get_global_id(1) * (uint)M0 * bias_stride_y) +
+ get_global_id(2) * bias_stride_z;
+
+ LOAD_BLOCK(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero);
+
+#ifndef UNIT_BETA
+ SCALE_BLOCK(M0, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+ // c = c + bias
+ ADD_BLOCK(M0, c, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+ ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, c, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+ // Store output block
+ STORE_BLOCK(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout);
+
+#undef RHS_BLOCK_SIZE
+#undef RHS_OFFSET_X
+#undef RHS_STEP_X
+}
+#endif // defined(M0) && defined(N0) && defined(K0) && defined(H0) && defined(DATA_TYPE) &&
+ // defined(M) && defined(N) && defined(K)
+
+#if defined(M0) && defined(N0) && defined(K0) && defined(V0) && defined(H0) && \
+ defined(DATA_TYPE) && defined(DATA_TYPE_ACCUMULATOR) && defined(M) && defined(N)
+
+#if defined(MIXED_PRECISION)
+#if K0 == 2
+#define ARM_DOT_K0(a, b, c) \
+ ({ \
+ c += a.s0 * b.s0; \
+ c += a.s1 * b.s1; \
+ })
+#elif K0 == 3 // K0 == 3
+#define ARM_DOT_K0(a, b, c) \
+ ({ \
+ c += a.s0 * b.s0; \
+ c += a.s1 * b.s1; \
+ c += a.s2 * b.s2; \
+ })
+#elif K0 == 4 // K0 == 4
+#define ARM_DOT_K0(a, b, c) \
+ ({ \
+ c += a.s0 * b.s0; \
+ c += a.s1 * b.s1; \
+ c += a.s2 * b.s2; \
+ c += a.s3 * b.s3; \
+ })
+#elif K0 == 8 // K0 == 8
+#define ARM_DOT_K0(a, b, c) \
+ ({ \
+ c += a.s0 * b.s0; \
+ c += a.s1 * b.s1; \
+ c += a.s2 * b.s2; \
+ c += a.s3 * b.s3; \
+ c += a.s4 * b.s4; \
+ c += a.s5 * b.s5; \
+ c += a.s6 * b.s6; \
+ c += a.s7 * b.s7; \
+ })
+#elif K0 == 16 // K0 == 16
+#define ARM_DOT_K0(a, b, c) \
+ ({ \
+ c += a.s0 * b.s0; \
+ c += a.s1 * b.s1; \
+ c += a.s2 * b.s2; \
+ c += a.s3 * b.s3; \
+ c += a.s4 * b.s4; \
+ c += a.s5 * b.s5; \
+ c += a.s6 * b.s6; \
+ c += a.s7 * b.s7; \
+ c += a.s8 * b.s8; \
+ c += a.s9 * b.s9; \
+ c += a.sA * b.sA; \
+ c += a.sB * b.sB; \
+ c += a.sC * b.sC; \
+ c += a.sD * b.sD; \
+ c += a.sE * b.sE; \
+ c += a.sF * b.sF; \
+ })
+#else // K0 not supported
+#error "K0 value not supported"
+#endif // K0 conditions
+#else // defined(MIXED_PRECISION)
+#if K0 == 2
+#define ARM_DOT_K0(a, b, c) \
+ ({ \
+ c = fma(a.s0, b.s0, c); \
+ c = fma(a.s1, b.s1, c); \
+ })
+#elif K0 == 3 // K0 == 3
+#define ARM_DOT_K0(a, b, c) \
+ ({ \
+ c = fma(a.s0, b.s0, c); \
+ c = fma(a.s1, b.s1, c); \
+ c = fma(a.s2, b.s2, c); \
+ })
+#elif K0 == 4 // K0 == 4
+#define ARM_DOT_K0(a, b, c) \
+ ({ \
+ c = fma(a.s0, b.s0, c); \
+ c = fma(a.s1, b.s1, c); \
+ c = fma(a.s2, b.s2, c); \
+ c = fma(a.s3, b.s3, c); \
+ })
+#elif K0 == 8 // K0 == 8
+#define ARM_DOT_K0(a, b, c) \
+ ({ \
+ c = fma(a.s0, b.s0, c); \
+ c = fma(a.s1, b.s1, c); \
+ c = fma(a.s2, b.s2, c); \
+ c = fma(a.s3, b.s3, c); \
+ c = fma(a.s4, b.s4, c); \
+ c = fma(a.s5, b.s5, c); \
+ c = fma(a.s6, b.s6, c); \
+ c = fma(a.s7, b.s7, c); \
+ })
+#elif K0 == 16 // K0 == 16
+#define ARM_DOT_K0(a, b, c) \
+ ({ \
+ c = fma(a.s0, b.s0, c); \
+ c = fma(a.s1, b.s1, c); \
+ c = fma(a.s2, b.s2, c); \
+ c = fma(a.s3, b.s3, c); \
+ c = fma(a.s4, b.s4, c); \
+ c = fma(a.s5, b.s5, c); \
+ c = fma(a.s6, b.s6, c); \
+ c = fma(a.s7, b.s7, c); \
+ c = fma(a.s8, b.s8, c); \
+ c = fma(a.s9, b.s9, c); \
+ c = fma(a.sA, b.sA, c); \
+ c = fma(a.sB, b.sB, c); \
+ c = fma(a.sC, b.sC, c); \
+ c = fma(a.sD, b.sD, c); \
+ c = fma(a.sE, b.sE, c); \
+ c = fma(a.sF, b.sF, c); \
+ })
+#else // K0 not supported
+#error "K0 value not supported"
+#endif // K0 conditions
+#endif // defined(MIXED_PRECISION)
+
+#if N0 == 2
+#define ARM_DOT_K0XN0(a, b, c) \
+ ({ \
+ ARM_DOT_K0((a), (b##0), (c.s0)); \
+ ARM_DOT_K0((a), (b##1), (c.s1)); \
+ })
+#elif N0 == 3 // N0 == 3
+#define ARM_DOT_K0XN0(a, b, c) \
+ ({ \
+ ARM_DOT_K0((a), (b##0), (c.s0)); \
+ ARM_DOT_K0((a), (b##1), (c.s1)); \
+ ARM_DOT_K0((a), (b##2), (c.s2)); \
+ })
+#elif N0 == 4 // N0 == 4
+#define ARM_DOT_K0XN0(a, b, c) \
+ ({ \
+ ARM_DOT_K0((a), (b##0), (c.s0)); \
+ ARM_DOT_K0((a), (b##1), (c.s1)); \
+ ARM_DOT_K0((a), (b##2), (c.s2)); \
+ ARM_DOT_K0((a), (b##3), (c.s3)); \
+ })
+#elif N0 == 8 // N0 == 8
+#define ARM_DOT_K0XN0(a, b, c) \
+ ({ \
+ ARM_DOT_K0((a), (b##0), (c.s0)); \
+ ARM_DOT_K0((a), (b##1), (c.s1)); \
+ ARM_DOT_K0((a), (b##2), (c.s2)); \
+ ARM_DOT_K0((a), (b##3), (c.s3)); \
+ ARM_DOT_K0((a), (b##4), (c.s4)); \
+ ARM_DOT_K0((a), (b##5), (c.s5)); \
+ ARM_DOT_K0((a), (b##6), (c.s6)); \
+ ARM_DOT_K0((a), (b##7), (c.s7)); \
+ })
+#elif N0 == 16 // N0 == 16
+#define ARM_DOT_K0XN0(a, b, c) \
+ ({ \
+ ARM_DOT_K0((a), (b##0), (c.s0)); \
+ ARM_DOT_K0((a), (b##1), (c.s1)); \
+ ARM_DOT_K0((a), (b##2), (c.s2)); \
+ ARM_DOT_K0((a), (b##3), (c.s3)); \
+ ARM_DOT_K0((a), (b##4), (c.s4)); \
+ ARM_DOT_K0((a), (b##5), (c.s5)); \
+ ARM_DOT_K0((a), (b##6), (c.s6)); \
+ ARM_DOT_K0((a), (b##7), (c.s7)); \
+ ARM_DOT_K0((a), (b##8), (c.s8)); \
+ ARM_DOT_K0((a), (b##9), (c.s9)); \
+ ARM_DOT_K0((a), (b##A), (c.sA)); \
+ ARM_DOT_K0((a), (b##B), (c.sB)); \
+ ARM_DOT_K0((a), (b##C), (c.sC)); \
+ ARM_DOT_K0((a), (b##D), (c.sD)); \
+ ARM_DOT_K0((a), (b##E), (c.sE)); \
+ ARM_DOT_K0((a), (b##F), (c.sF)); \
+ })
+#else // N0 not supported
+#error "N0 value not supported"
+#endif // N0 conditions
+
+/** This OpenCL kernel computes the matrix multiplication between 2 matrices.
+ * The LHS matrix must be reshaped with @ref CLGEMMReshapeLHSMatrixKernel and the M0xK0 must be NOT
+ * transposed The RHS matrix must be reshaped with @ref CLGEMMReshapeRHSMatrixKernel and the K0xN0
+ * must be transposed
+ *
+ * @note The data type must be passed at compile time using -DDATA_TYPE (e.g. -DDATA_TYPE=float)
+ * @note The data type used for the accumulators must be passed at compile time using
+ * -DDATA_TYPE_ACCUMULATOR (e.g. -DDATA_TYPE_ACCUMULATOR=float)
+ * @note The F16 computation also supports mixed precision through the option -DMIXED_PRECISION
+ * passed at compile time. If enabled, DATA_TYPE_ACCUMULATOR should be set to float
+ * @note If the first two dimensions of NDRange have been dispatched with "dummy_work_items"
+ * support, the option -DDUMMY_WORK_ITEMS must be passed at compile time.
+ * @note The GEMM's dimensions M and N must be passed at compile time using -DM and -DN (e.g. -DM=52
+ * and -DN=90).
+ * @note The block's dimensions used for reshaping the LHS matrix and the RHS matrix (M0, N0 and K0)
+ * must be passed at compile time using -DM0, -DN0 and -DK0 (e.g. -DM0=4, -DN0=8, -DK0=4).
+ * @note The number of M0xK0 vertical blocks stored on the same output row of the reshaped LHS
+ * matrix must be passed at compile time using -DV0 (e.g. -DV0=2)
+ * @note The number of K0xN0 horizontal blocks stored on the same output row of the reshaped RHS
+ * matrix must be passed at compile time using -DH0 (e.g. -DH0=2)
+ * @note If the M0xK0 blocks in the reshaped LHS matrix have been interleaved, the option
+ * -DLHS_INTERLEAVE must passed at compile time.
+ * @note If the K0xN0 blocks in the reshaped RHS matrix have been interleaved, the option
+ * -DRHS_INTERLEAVE must passed at compile time.
+ * @note Only the following configurations of M0, N0 and K0 are currently supported:
+ * - M0 = 2, 3, 4, 5, 6, 7, 8
+ * - N0 = 2, 3, 4, 8, 16
+ * - K0 = 2, 3, 4, 8, 16
+ * - V0 >= 1
+ * - H0 >= 1
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the output has to be reinterpreted as a 3D tensor (e.g. output of convolution
+ * layer), the following information must be passed at compile time:
+ * -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ * -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ * -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ * (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns LHS matrix NOT reshaped
+ *
+ * @param[in] lhs_ptr Pointer to the LHS reshaped matrix. Supported data
+ * type: F16/F32
+ * @param[in] lhs_stride_x Stride of the LHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in] lhs_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] lhs_stride_y Stride of the LHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in] lhs_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] lhs_offset_first_element_in_bytes The offset of the first element in the LHS
+ * reshaped matrix
+ * @param[in] rhs_ptr Pointer to the RHS reshaped matrix. Supported data
+ * type: same as @p lhs_ptr
+ * @param[in] rhs_stride_x Stride of the RHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in] rhs_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] rhs_stride_y Stride of the RHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in] rhs_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] rhs_offset_first_element_in_bytes The offset of the first element in the RHS
+ * reshaped matrix
+ * @param[in] bias_ptr (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in] bias_stride_x (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in] bias_step_x (Optional) bias_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] bias_stride_y (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in] bias_step_y (Optional) bias_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in] bias_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr Pointer to the destination matrix Supported data
+ * type: same as @p lhs_ptr
+ * @param[in] dst_stride_x Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in] dst_step_x dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in] dst_step_y dst_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in] k Number of columns in LHS matrix and rows in RHS
+ * matrix not reshaped.
+ * @param[in] lhs_stride_z Stride of the LHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in] rhs_stride_z Stride of the RHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in] bias_stride_z (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in] dst_stride_z Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in] dst_cross_plane_pad (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_reshaped_lhs_nt_rhs_t(IMAGE_DECLARATION(lhs), IMAGE_DECLARATION(rhs),
+#if defined(BETA)
+ IMAGE_DECLARATION(bias),
+#endif // defined(BETA)
+ IMAGE_DECLARATION(dst), uint k, uint lhs_stride_z,
+ uint rhs_stride_z,
+#if defined(BETA)
+ uint bias_stride_z,
+#endif // defined(BETA)
+ uint dst_stride_z
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ ,
+ uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+ // Block size
+#define LHS_BLOCK_SIZE ((K0) * (M0))
+
+#if defined(LHS_INTERLEAVE)
+#define LHS_OFFSET_X (K0)
+#define LHS_STEP_X ((K0) * (V0))
+#define LHS_STEP_LOOP (1)
+#else // defined(INTERLEAVE)
+#define LHS_OFFSET_X (LHS_BLOCK_SIZE)
+#define LHS_STEP_X (K0)
+#define LHS_STEP_LOOP (V0)
+#endif // defined(INTERLEAVE)
+
+ // Block size
+#define RHS_BLOCK_SIZE ((K0) * (N0))
+
+ // RHS offset and step X
+#if defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (K0)
+#define RHS_STEP_X ((K0) * (H0))
+#define RHS_STEP_LOOP (1)
+#else // defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (RHS_BLOCK_SIZE)
+#define RHS_STEP_X (K0)
+#define RHS_STEP_LOOP (H0)
+#endif // defined(RHS_INTERLEAVE)
+
+#if defined(DUMMY_WORK_ITEMS)
+ if ((get_global_id(0) * N0 >= N) || (get_global_id(1) * M0 >= M))
+ {
+ return;
+ }
+#endif // defined(DUMMY_WORK_ITEMS)
+
+ // Compute LHS matrix address
+ __global uchar *lhs_addr = lhs_ptr + lhs_offset_first_element_in_bytes +
+ (get_global_id(1) % V0) * (uint)LHS_OFFSET_X * sizeof(DATA_TYPE) +
+ (get_global_id(1) / V0) * (uint)lhs_stride_y +
+ (get_global_id(2) * lhs_stride_z);
+
+ // Compute RHS matrix address
+ __global uchar *rhs_addr = rhs_ptr + rhs_offset_first_element_in_bytes +
+ (get_global_id(0) % H0) * (uint)RHS_OFFSET_X * sizeof(DATA_TYPE) +
+ (get_global_id(0) / (uint)H0) * rhs_stride_y;
+
+#if defined(MATRIX_B_DEPTH)
+ // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+ rhs_addr += (get_global_id(2) % MATRIX_B_DEPTH) * rhs_stride_z;
+#else // defined(MATRIX_B_DEPTH)
+ rhs_addr += get_global_id(2) * rhs_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+ // Initialize the accumulators
+ REPEAT_VAR_INIT_TO_CONST(M0, VEC_DATA_TYPE(DATA_TYPE_ACCUMULATOR, N0), c, 0);
+
+ REPEAT_VAR_INIT_TO_CONST(M0, uint, zlhs, 0); // uint zlhs0=0,zlhs1=0,zlhs2=0,... zlhs7=0;
+ REPEAT_VAR_INIT_TO_CONST(16, uint, zero, 0);
+
+ for (int i = 0; i < k; i += K0)
+ {
+ // Supported cases (M0, K0):
+ // 1,2 - 1,3 - 1,4 - 1,8 - 1,16
+ // 2,2 - 2,3 - 2,4 - 2,8 - 2,16
+ // 3,2 - 3,3 - 3,4 - 3,8 - 3,16
+ // 4,2 - 4,3 - 4,4 - 4,8 - 4,16
+ // 5,2 - 5,3 - 5,4 - 5,8 - 5,16
+ // 6,2 - 6,3 - 6,4 - 6,8 - 6,16
+ // 7,2 - 7,3 - 7,4 - 7,8 - 7,16
+ // 8,2 - 8,3 - 8,4 - 8,8 - 8,16
+ // Load values from LHS matrix
+ LOAD_BLOCK(M0, K0, DATA_TYPE, a, lhs_addr, 0, LHS_STEP_X * sizeof(DATA_TYPE), zlhs);
+
+ // Load values from RHS matrix
+ LOAD_BLOCK(N0, K0, DATA_TYPE, b, rhs_addr, 0, RHS_STEP_X * sizeof(DATA_TYPE), zero);
+
+ // Accumulate
+ ARM_DOT_K0XN0(a0, b, c0);
+#if M0 > 1
+ ARM_DOT_K0XN0(a1, b, c1);
+#endif // M0 > 1
+#if M0 > 2
+ ARM_DOT_K0XN0(a2, b, c2);
+#endif // M0 > 2
+#if M0 > 3
+ ARM_DOT_K0XN0(a3, b, c3);
+#endif // M0 > 3
+#if M0 > 4
+ ARM_DOT_K0XN0(a4, b, c4);
+#endif // M0 > 4
+#if M0 > 5
+ ARM_DOT_K0XN0(a5, b, c5);
+#endif // M0 > 5
+#if M0 > 6
+ ARM_DOT_K0XN0(a6, b, c6);
+#endif // M0 > 6
+#if M0 > 7
+ ARM_DOT_K0XN0(a7, b, c7);
+#endif // M0 > 7
+
+ lhs_addr += (M0 * LHS_STEP_X * LHS_STEP_LOOP) * sizeof(DATA_TYPE);
+ rhs_addr += (N0 * RHS_STEP_X * RHS_STEP_LOOP) * sizeof(DATA_TYPE);
+ }
+
+ __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes +
+ (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE)) +
+ (get_global_id(1) * (uint)M0 * dst_stride_y);
+
+ REPEAT_VAR_INIT_TO_CONST(M0, uint, zout, 0);
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // The plane (zin) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+ CALCULATE_Z_OFFSET(M0, uint, zout, get_global_id(1), HEIGHT_GEMM3D, DEPTH_GEMM3D,
+ dst_cross_plane_pad, dst_stride_y);
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply dst_stride_z by DEPTH_GEMM3D
+ dst_addr += get_global_id(2) * dst_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Add offset for batched GEMM
+ dst_addr += get_global_id(2) * dst_stride_z;
+
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+ SCALE_BLOCK(M0, DATA_TYPE, c, ALPHA);
+#endif // defined(ALPHA)
+
+ // Add beta*bias
+#if defined(BETA)
+#if defined(BROADCAST_BIAS)
+ __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes +
+ (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE));
+
+ LOAD_BLOCK(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero);
+
+#ifndef UNIT_BETA
+ SCALE_BLOCK(1, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+ // c = c + bias[broadcasted]
+#if defined(MIXED_PRECISION)
+ CONVERT_BLOCK(1, N0, DATA_TYPE_ACCUMULATOR, bias, bias_hp);
+ ADD_BLOCK_BROADCAST(M0, c, bias_hp0);
+#else // defined(MIXED_PRECISION)
+ ADD_BLOCK_BROADCAST(M0, c, bias0);
+#endif // defined(MIXED_PRECISION)
+
+#else // defined(BROADCAST_BIAS)
+ __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes +
+ (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE)) +
+ (get_global_id(1) * (uint)M0 * bias_stride_y) +
+ get_global_id(2) * bias_stride_z;
+
+ LOAD_BLOCK(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero);
+
+#ifndef UNIT_BETA
+ SCALE_BLOCK(M0, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+ // c = c + bias
+#if defined(MIXED_PRECISION)
+ CONVERT_BLOCK(M0, N0, DATA_TYPE_ACCUMULATOR, bias, bias_hp);
+ ADD_BLOCK(M0, c, bias_hp);
+#else // defined(MIXED_PRECISION)
+ ADD_BLOCK(M0, c, bias);
+#endif // defined(MIXED_PRECISION)
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+#if defined(MIXED_PRECISION)
+ ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE_ACCUMULATOR, c, A_VAL, B_VAL);
+#else // defined(MIXED_PRECISION)
+ ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, c, A_VAL, B_VAL);
+#endif // defined(MIXED_PRECISION)
+#endif // defined(ACTIVATION_TYPE)
+
+ // Store output block
+#if defined(MIXED_PRECISION)
+ CONVERT_STORE_BLOCK(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout);
+#else // defined(MIXED_PRECISION)
+ STORE_BLOCK(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout);
+#endif // defined(MIXED_PRECISION)
+
+#undef LHS_BLOCK_SIZE
+#undef LHS_OFFSET_X
+#undef LHS_STEP_X
+#undef RHS_BLOCK_SIZE
+#undef RHS_OFFSET_X
+#undef RHS_STEP_X
+}
+
+#if defined(LHS_TRANSPOSE)
+
+#define VTYPE(TYPE, SIZE) VEC_DATA_TYPE(TYPE, SIZE)
+
+#if defined(MIXED_PRECISION)
+
+#if (GPU_ARCH == GPU_ARCH_MIDGARD)
+#define ARM_VFMA(N0, a, b, c) \
+ c += (CONVERT(a, VEC_DATA_TYPE(DATA_TYPE_ACCUMULATOR, N0))) * \
+ (CONVERT(b, VEC_DATA_TYPE(DATA_TYPE_ACCUMULATOR, N0)));
+#else // GPU_ARCH == GPU_ARCH_MIDGARD
+#define ARM_VFMA(N0, a, b, c) \
+ c = fma((CONVERT(a, VEC_DATA_TYPE(DATA_TYPE_ACCUMULATOR, N0))), \
+ (CONVERT(b, VEC_DATA_TYPE(DATA_TYPE_ACCUMULATOR, N0))), (c));
+#endif // GPU_ARCH == GPU_ARCH_MIDGARD
+
+#else // defined(MIXED_PRECISION
+
+#if (GPU_ARCH == GPU_ARCH_MIDGARD)
+#define ARM_VFMA(N0, a, b, c) c += (a) * (b);
+#else // GPU_ARCH == GPU_ARCH_MIDGARD
+#define ARM_VFMA(N0, a, b, c) c = fma((a), (b), (c));
+#endif // GPU_ARCH == GPU_ARCH_MIDGARD
+
+#endif // defined(MIXED_PRECISION)
+
+#define ARM_VVM_T_NT_1xN0x1(N0, TYPE, a, b, C) ({ ARM_VFMA(N0, (VTYPE(TYPE, N0))(a), b, (C##0)); })
+#define ARM_VVM_T_NT_2xN0x1(N0, TYPE, a, b, C) \
+ ({ \
+ ARM_VFMA(N0, (VTYPE(TYPE, N0))(a.s0), b, (C##0)); \
+ ARM_VFMA(N0, (VTYPE(TYPE, N0))(a.s1), b, (C##1)); \
+ })
+#define ARM_VVM_T_NT_3xN0x1(N0, TYPE, a, b, C) \
+ ({ \
+ ARM_VVM_T_NT_2xN0x1(N0, TYPE, a, b, C); \
+ ARM_VFMA(N0, (VTYPE(TYPE, N0))(a.s2), b, (C##2)); \
+ })
+#define ARM_VVM_T_NT_4xN0x1(N0, TYPE, a, b, C) \
+ ({ \
+ ARM_VVM_T_NT_3xN0x1(N0, TYPE, a, b, C); \
+ ARM_VFMA(N0, (VTYPE(TYPE, N0))(a.s3), b, (C##3)); \
+ })
+#define ARM_VVM_T_NT_8xN0x1(N0, TYPE, a, b, C) \
+ ({ \
+ ARM_VVM_T_NT_4xN0x1(N0, TYPE, a, b, C); \
+ ARM_VFMA(N0, (VTYPE(TYPE, N0))(a.s4), b, (C##4)); \
+ ARM_VFMA(N0, (VTYPE(TYPE, N0))(a.s5), b, (C##5)); \
+ ARM_VFMA(N0, (VTYPE(TYPE, N0))(a.s6), b, (C##6)); \
+ ARM_VFMA(N0, (VTYPE(TYPE, N0))(a.s7), b, (C##7)); \
+ })
+
+// Factory macro for the column-vector (transposed) by row-vector (not transposed) multiplication.
+// K0 = 1 a is the column-vector (transposed) b is the row-vector (not transposed) C is the output
+// matrix Lower case is a vector (a, b) Upper case is a matrix (C)
+#define ARM_VVM_T_NT_M0xN0x1(M0, N0, TYPE, a, b, C) ARM_VVM_T_NT_##M0##xN0x1(N0, TYPE, a, b, C)
+
+#define ARM_MM_T_NT_M0xN0x1(M0, N0, TYPE, A, B, C) \
+ ({ ARM_VVM_T_NT_M0xN0x1(M0, N0, TYPE, (A##0), (B##0), C); })
+#define ARM_MM_T_NT_M0xN0x2(M0, N0, TYPE, A, B, C) \
+ ({ \
+ ARM_MM_T_NT_M0xN0x1(M0, N0, TYPE, A, B, C); \
+ ARM_VVM_T_NT_M0xN0x1(M0, N0, TYPE, (A##1), (B##1), C); \
+ })
+#define ARM_MM_T_NT_M0xN0x3(M0, N0, TYPE, A, B, C) \
+ ({ \
+ ARM_MM_T_NT_M0xN0x2(M0, N0, TYPE, A, B, C); \
+ ARM_VVM_T_NT_M0xN0x1(M0, N0, TYPE, (A##2), (B##2), C); \
+ })
+#define ARM_MM_T_NT_M0xN0x4(M0, N0, TYPE, A, B, C) \
+ ({ \
+ ARM_MM_T_NT_M0xN0x3(M0, N0, TYPE, A, B, C); \
+ ARM_VVM_T_NT_M0xN0x1(M0, N0, TYPE, (A##3), (B##3), C); \
+ })
+#define ARM_MM_T_NT_M0xN0x8(M0, N0, TYPE, A, B, C) \
+ ({ \
+ ARM_MM_T_NT_M0xN0x4(M0, N0, TYPE, A, B, C); \
+ ARM_VVM_T_NT_M0xN0x1(M0, N0, TYPE, (A##4), (B##4), C); \
+ ARM_VVM_T_NT_M0xN0x1(M0, N0, TYPE, (A##5), (B##5), C); \
+ ARM_VVM_T_NT_M0xN0x1(M0, N0, TYPE, (A##6), (B##6), C); \
+ ARM_VVM_T_NT_M0xN0x1(M0, N0, TYPE, (A##7), (B##7), C); \
+ })
+#define ARM_MM_T_NT_M0xN0x16(M0, N0, TYPE, A, B, C) \
+ ({ \
+ ARM_MM_T_NT_M0xN0x8(M0, N0, TYPE, A, B, C); \
+ ARM_MM_T_NT_M0xN0x1(M0, N0, TYPE, (A##8), (B##8), C); \
+ ARM_MM_T_NT_M0xN0x1(M0, N0, TYPE, (A##9), (B##9), C); \
+ ARM_MM_T_NT_M0xN0x1(M0, N0, TYPE, (A##A), (B##A), C); \
+ ARM_MM_T_NT_M0xN0x1(M0, N0, TYPE, (A##B), (B##B), C); \
+ ARM_MM_T_NT_M0xN0x1(M0, N0, TYPE, (A##C), (B##C), C); \
+ ARM_MM_T_NT_M0xN0x1(M0, N0, TYPE, (A##D), (B##D), C); \
+ ARM_MM_T_NT_M0xN0x1(M0, N0, TYPE, (A##E), (B##E), C); \
+ ARM_MM_T_NT_M0xN0x1(M0, N0, TYPE, (A##F), (B##F), C); \
+ })
+
+// Factory macro for the matrix (transposed) by matrix (not transposed) multiplication.
+// The dimensions for this matrix multiplications are defined through M0, N0 and K0
+// The dimensions supported are:
+// M0: 1, 2, 3, 4, 8
+// N0: 1, 2, 3, 4, 8, 16
+// K0: 1, 2, 3, 4, 8, 16
+// This macro calls the vector-by-matrix macro K0 times
+// A, B and C are matrices
+#define ARM_MM_T_NT(M0, N0, K0, TYPE, A, B, C) \
+ CONCAT(ARM_MM_T_NT_M0xN0x, K0) \
+ (M0, N0, TYPE, A, B, C)
+
+/** This OpenCL kernel computes the matrix multiplication between 2 matrices.
+ * The LHS matrix must be reshaped with @ref CLGEMMReshapeLHSMatrixKernel and the M0xK0 must be
+ * transposed The RHS matrix must be reshaped with @ref CLGEMMReshapeRHSMatrixKernel and the K0xN0
+ * must be NOT transposed
+ *
+ * @note LHS_TRANSPOSE should be passed at compile time in order to compile this OpenCL kernel (e.g.
+ * -DLHS_TRANSPOSE).
+ * @note If the first two dimensions of NDRange have been dispatched with "dummy_work_items"
+ * support, the option -DDUMMY_WORK_ITEMS must be passed at compile time.
+ * @note The GEMM's dimensions M and N must be passed at compile time using -DM and -DN (e.g. -DM=52
+ * and -DN=90).
+ * @note The block's dimensions used for reshaping the LHS matrix and the RHS matrix (M0, N0 and K0)
+ * must be passed at compile time using -DM0, -DN0 and -DK0 (e.g. -DM0=4, -DN0=8, -DK0=4).
+ * @note The number of M0xK0 vertical blocks stored on the same output row of the reshaped LHS
+ * matrix must be passed at compile time using -DV0 (e.g. -DV0=2)
+ * @note The number of K0xN0 horizontal blocks stored on the same output row of the reshaped RHS
+ * matrix must be passed at compile time using -DH0 (e.g. -DH0=2)
+ * @note If the M0xK0 blocks in the reshaped LHS matrix have been interleaved, the option
+ * -DLHS_INTERLEAVE must passed at compile time.
+ * @note If the K0xN0 blocks in the reshaped RHS matrix have been interleaved, the option
+ * -DRHS_INTERLEAVE must passed at compile time.
+ * @note Only the following configurations of M0, N0 and K0 are currently supported:
+ * - M0 = 2, 3, 4, 8
+ * - N0 = 2, 3, 4, 8, 16
+ * - K0 = 2, 3, 4, 8, 16
+ * - V0 >= 1
+ * - H0 >= 1
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the output has to be reinterpreted as a 3D tensor (e.g. output of convolution
+ * layer), the following information must be passed at compile time:
+ * -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ * -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ * -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ * (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns LHS matrix NOT reshaped
+ *
+ * @param[in] lhs_ptr Pointer to the LHS reshaped matrix. Supported data
+ * type: F16/F32
+ * @param[in] lhs_stride_x Stride of the LHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in] lhs_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] lhs_stride_y Stride of the LHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in] lhs_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] lhs_offset_first_element_in_bytes The offset of the first element in the LHS
+ * reshaped matrix
+ * @param[in] rhs_ptr Pointer to the RHS reshaped matrix. Supported data
+ * type: same as @p lhs_ptr
+ * @param[in] rhs_stride_x Stride of the RHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in] rhs_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] rhs_stride_y Stride of the RHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in] rhs_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] rhs_offset_first_element_in_bytes The offset of the first element in the RHS
+ * reshaped matrix
+ * @param[in] bias_ptr (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in] bias_stride_x (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in] bias_step_x (Optional) bias_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] bias_stride_y (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in] bias_step_y (Optional) bias_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in] bias_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr Pointer to the destination matrix Supported data
+ * type: same as @p lhs_ptr
+ * @param[in] dst_stride_x Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in] dst_step_x dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in] dst_step_y dst_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in] k Number of columns in LHS matrix and rows in RHS
+ * matrix not reshaped.
+ * @param[in] lhs_stride_z Stride of the LHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in] rhs_stride_z Stride of the RHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in] bias_stride_z (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in] dst_stride_z Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in] dst_cross_plane_pad (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_reshaped_lhs_t_rhs_nt(IMAGE_DECLARATION(lhs), IMAGE_DECLARATION(rhs),
+#if defined(BETA)
+ IMAGE_DECLARATION(bias),
+#endif // defined(BETA)
+ IMAGE_DECLARATION(dst), uint k, uint lhs_stride_z,
+ uint rhs_stride_z,
+#if defined(BETA)
+ uint bias_stride_z,
+#endif // defined(BETA)
+ uint dst_stride_z
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ ,
+ uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+ // Block size
+#define LHS_BLOCK_SIZE ((K0) * (M0))
+
+#if defined(LHS_INTERLEAVE)
+#define LHS_OFFSET_X (M0)
+#define LHS_STEP_X ((M0) * (V0))
+#define LHS_STEP_LOOP (1)
+#else // defined(INTERLEAVE)
+#define LHS_OFFSET_X (LHS_BLOCK_SIZE)
+#define LHS_STEP_X (M0)
+#define LHS_STEP_LOOP (V0)
+#endif // defined(INTERLEAVE)
+
+ // Block size
+#define RHS_BLOCK_SIZE ((K0) * (N0))
+
+ // RHS offset and step X
+#if defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (N0)
+#define RHS_STEP_X ((N0) * (H0))
+#else // defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (RHS_BLOCK_SIZE)
+#define RHS_STEP_X (N0)
+#endif // defined(RHS_INTERLEAVE)
+
+ const uint x = get_global_id(0);
+ const uint y = get_global_id(1);
+ const uint z = get_global_id(2);
+
+#if defined(DUMMY_WORK_ITEMS)
+ if ((x * N0 >= N) || (y * M0 >= M))
+ {
+ return;
+ }
+#endif // defined(DUMMY_WORK_ITEMS)
+
+ // Compute LHS matrix address
+ __global uchar *lhs_addr = lhs_ptr + lhs_offset_first_element_in_bytes +
+ (y % V0) * (uint)LHS_OFFSET_X * sizeof(DATA_TYPE) +
+ (y / V0) * (uint)lhs_stride_y + (z * lhs_stride_z);
+
+ // Compute RHS matrix address
+ __global uchar *rhs_addr = rhs_ptr + rhs_offset_first_element_in_bytes +
+ (x % H0) * (uint)RHS_OFFSET_X * sizeof(DATA_TYPE) +
+ (x / (uint)H0) * rhs_stride_y;
+
+#if defined(MATRIX_B_DEPTH)
+ // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+ rhs_addr += (z % MATRIX_B_DEPTH) * rhs_stride_z;
+#else // defined(MATRIX_B_DEPTH)
+ rhs_addr += z * rhs_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+ // Initialize the accumulators
+ REPEAT_VAR_INIT_TO_CONST(M0, VEC_DATA_TYPE(DATA_TYPE_ACCUMULATOR, N0), c, 0);
+
+ REPEAT_VAR_INIT_TO_CONST(M0, uint, zero, 0);
+
+ __global DATA_TYPE *lhs = (__global DATA_TYPE *)(lhs_addr);
+ __global DATA_TYPE *rhs = (__global DATA_TYPE *)(rhs_addr);
+
+ for (int i = 0; i < k; i += K0)
+ {
+ VEC_DATA_TYPE(DATA_TYPE, M0)
+ a0 = VLOAD(M0)(0, lhs);
+ VEC_DATA_TYPE(DATA_TYPE, N0)
+ b0 = VLOAD(N0)(0, rhs);
+
+ ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+ lhs += LHS_STEP_X;
+ rhs += RHS_STEP_X;
+
+#if K0 > 1
+ a0 = VLOAD(M0)(0, lhs);
+ b0 = VLOAD(N0)(0, rhs);
+
+ ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+ lhs += LHS_STEP_X;
+ rhs += RHS_STEP_X;
+#endif // K0 > 1
+
+#if K0 > 2
+ a0 = VLOAD(M0)(0, lhs);
+ b0 = VLOAD(N0)(0, rhs);
+
+ ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+ lhs += LHS_STEP_X;
+ rhs += RHS_STEP_X;
+#endif // K0 > 2
+
+#if K0 > 3
+ a0 = VLOAD(M0)(0, lhs);
+ b0 = VLOAD(N0)(0, rhs);
+
+ ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+ lhs += LHS_STEP_X;
+ rhs += RHS_STEP_X;
+#endif // K0 > 3
+
+#if K0 > 4
+ a0 = VLOAD(M0)(0, lhs);
+ b0 = VLOAD(N0)(0, rhs);
+
+ ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+ lhs += LHS_STEP_X;
+ rhs += RHS_STEP_X;
+
+ a0 = VLOAD(M0)(0, lhs);
+ b0 = VLOAD(N0)(0, rhs);
+
+ ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+ lhs += LHS_STEP_X;
+ rhs += RHS_STEP_X;
+
+ a0 = VLOAD(M0)(0, lhs);
+ b0 = VLOAD(N0)(0, rhs);
+
+ ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+ lhs += LHS_STEP_X;
+ rhs += RHS_STEP_X;
+
+ a0 = VLOAD(M0)(0, lhs);
+ b0 = VLOAD(N0)(0, rhs);
+
+ ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+ lhs += LHS_STEP_X;
+ rhs += RHS_STEP_X;
+#endif // K0 > 4
+
+#if K0 > 8
+ a0 = VLOAD(M0)(0, lhs);
+ b0 = VLOAD(N0)(0, rhs);
+
+ ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+ lhs += LHS_STEP_X;
+ rhs += RHS_STEP_X;
+
+ a0 = VLOAD(M0)(0, lhs);
+ b0 = VLOAD(N0)(0, rhs);
+
+ ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+ lhs += LHS_STEP_X;
+ rhs += RHS_STEP_X;
+
+ a0 = VLOAD(M0)(0, lhs);
+ b0 = VLOAD(N0)(0, rhs);
+
+ ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+ lhs += LHS_STEP_X;
+ rhs += RHS_STEP_X;
+
+ a0 = VLOAD(M0)(0, lhs);
+ b0 = VLOAD(N0)(0, rhs);
+
+ ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+ lhs += LHS_STEP_X;
+ rhs += RHS_STEP_X;
+
+ a0 = VLOAD(M0)(0, lhs);
+ b0 = VLOAD(N0)(0, rhs);
+
+ ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+ lhs += LHS_STEP_X;
+ rhs += RHS_STEP_X;
+
+ a0 = VLOAD(M0)(0, lhs);
+ b0 = VLOAD(N0)(0, rhs);
+
+ ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+ lhs += LHS_STEP_X;
+ rhs += RHS_STEP_X;
+
+ a0 = VLOAD(M0)(0, lhs);
+ b0 = VLOAD(N0)(0, rhs);
+
+ ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+ lhs += LHS_STEP_X;
+ rhs += RHS_STEP_X;
+
+ a0 = VLOAD(M0)(0, lhs);
+ b0 = VLOAD(N0)(0, rhs);
+
+ ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+ lhs += LHS_STEP_X;
+ rhs += RHS_STEP_X;
+#endif // K0 > 8
+
+#ifndef LHS_INTERLEAVE
+ lhs += (M0 * K0 * (V0 - 1));
+#endif // LHS_INTERLEAVE
+
+#ifndef RHS_INTERLEAVE
+ rhs += (N0 * K0 * (H0 - 1));
+#endif // RHS_INTERLEAVE
+ }
+
+ __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes +
+ (x * (uint)N0 * sizeof(DATA_TYPE)) + (y * (uint)M0 * dst_stride_y);
+
+ REPEAT_VAR_INIT_TO_CONST(M0, uint, zout, 0);
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // The plane (zin) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+ CALCULATE_Z_OFFSET(M0, uint, zout, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad,
+ dst_stride_y);
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply dst_stride_z by DEPTH_GEMM3D
+ dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Add offset for batched GEMM
+ dst_addr += z * dst_stride_z;
+
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+ SCALE_BLOCK(M0, DATA_TYPE, c, ALPHA);
+#endif // defined(ALPHA)
+
+ // Add beta*bias
+#if defined(BETA)
+#if defined(BROADCAST_BIAS)
+ __global uchar *bias_addr =
+ bias_ptr + bias_offset_first_element_in_bytes + (x * (uint)N0 * sizeof(DATA_TYPE));
+
+ LOAD_BLOCK(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero);
+
+#ifndef UNIT_BETA
+ SCALE_BLOCK(1, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+ // c = c + bias[broadcasted]
+#if defined(MIXED_PRECISION)
+ CONVERT_BLOCK(1, N0, DATA_TYPE_ACCUMULATOR, bias, bias_hp);
+ ADD_BLOCK_BROADCAST(M0, c, bias_hp0);
+#else // defined(MIXED_PRECISION)
+ ADD_BLOCK_BROADCAST(M0, c, bias0);
+#endif // defined(MIXED_PRECISION)
+
+#else // defined(BROADCAST_BIAS)
+ __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes +
+ (x * (uint)N0 * sizeof(DATA_TYPE)) + (y * (uint)M0 * bias_stride_y) +
+ z * bias_stride_z;
+
+ LOAD_BLOCK(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero);
+
+#ifndef UNIT_BETA
+ SCALE_BLOCK(M0, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+#if defined(MIXED_PRECISION)
+ CONVERT_BLOCK(M0, N0, DATA_TYPE_ACCUMULATOR, bias, bias_hp);
+ ADD_BLOCK(M0, c, bias_hp);
+#else // defined(MIXED_PRECISION)
+ ADD_BLOCK(M0, c, bias);
+#endif // defined(MIXED_PRECISION)
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+#if defined(MIXED_PRECISION)
+ ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE_ACCUMULATOR, c, A_VAL, B_VAL);
+#else // defined(MIXED_PRECISION)
+ ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, c, A_VAL, B_VAL);
+#endif // defined(MIXED_PRECISION)
+#endif // defined(ACTIVATION_TYPE)
+
+ // Store output block
+#if defined(MIXED_PRECISION)
+ CONVERT_STORE_BLOCK(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout);
+#else // defined(MIXED_PRECISION)
+ STORE_BLOCK(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout);
+#endif // defined(MIXED_PRECISION)
+
+#undef LHS_BLOCK_SIZE
+#undef LHS_OFFSET_X
+#undef LHS_STEP_X
+#undef RHS_BLOCK_SIZE
+#undef RHS_OFFSET_X
+#undef RHS_STEP_X
+}
+
+#endif // defined(LHS_TRANSPOSE)
+
+#endif // defined(M0) && defined(N0) && defined(K0) && defined(V0) && defined(H0) && defined(K) &&
+ // defined(DATA_TYPE)
+
+#if defined(M0) && defined(N0) && defined(K0) && defined(K) && defined(DATA_TYPE)
+
+#define VFMA(a, b, c) ({ c = fma(a, b, c); })
+
+#if M0 == 1
+#define RHS_VFMA_M0xN0(i, a, b, c) \
+ ({ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); })
+#elif M0 == 2 // M0 == 2
+#define RHS_VFMA_M0xN0(i, a, b, c) \
+ ({ \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1)); \
+ })
+#elif M0 == 3 // M0 == 3
+#define RHS_VFMA_M0xN0(i, a, b, c) \
+ ({ \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2)); \
+ })
+#elif M0 == 4 // M0 == 4
+#define RHS_VFMA_M0xN0(i, a, b, c) \
+ ({ \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##3).s##i), b, (c##3)); \
+ })
+#elif M0 == 5 // M0 == 5
+#define RHS_VFMA_M0xN0(i, a, b, c) \
+ ({ \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##3).s##i), b, (c##3)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##4).s##i), b, (c##4)); \
+ })
+#elif M0 == 6 // M0 == 6
+#define RHS_VFMA_M0xN0(i, a, b, c) \
+ ({ \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##3).s##i), b, (c##3)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##4).s##i), b, (c##4)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##5).s##i), b, (c##5)); \
+ })
+#elif M0 == 7 // M0 == 7
+#define RHS_VFMA_M0xN0(i, a, b, c) \
+ ({ \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##3).s##i), b, (c##3)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##4).s##i), b, (c##4)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##5).s##i), b, (c##5)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##6).s##i), b, (c##6)); \
+ })
+#elif M0 == 8 // M0 == 8
+#define RHS_VFMA_M0xN0(i, a, b, c) \
+ ({ \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##3).s##i), b, (c##3)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##4).s##i), b, (c##4)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##5).s##i), b, (c##5)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##6).s##i), b, (c##6)); \
+ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##7).s##i), b, (c##7)); \
+ })
+#else // M0 not supported
+#error "M0 not supported"
+#endif // M0 not supported
+
+/** This OpenCL kernel computes the matrix multiplication between 2 matrices.
+ * The LHS matrix is NOT reshaped
+ * The RHS matrix is NOT reshaped
+ *
+ * @note If the first two dimensions of NDRange have been dispatched with "dummy_work_items"
+ * support, the option -DDUMMY_WORK_ITEMS must be passed at compile time.
+ * @note The GEMM's dimensions (M,N and K) must be passed at compile time using -DM, -DN and and -DK
+ * (e.g. -DM=52, -DN=30 and -DK=90)
+ * @note The number of columns of LHS matrix must be passed at compile time using -DK (e.g. -DK=64)
+ * @note The number of M0 rows to process must be passed at compile time using -DM0 (e.g. -DM0=2)
+ * @note The number of K0 partial accumulations must be passed at compile time using -DK0 (e.g.,
+ * -DK0=2)
+ * @note The number of N0 columns to process must be passed at compile time using -DN0 (e.g. -DN0=2)
+ * @note Only the following configurations of M0, N0 and K0 are currently supported:
+ * - M0 = 1, 2, 3, 4, 5, 6, 7, 8
+ * - N0 = 2, 3, 4, 8, 16
+ * - K0 = 2, 3, 4, 8, 16
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ * -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ * -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ * -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ * -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ * (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns LHS matrix
+ *
+ * @param[in] lhs_ptr Pointer to the LHS matrix. Supported data type:
+ * F16/F32
+ * @param[in] lhs_stride_x Stride of the LHS matrix in X dimension (in bytes)
+ * @param[in] lhs_step_x lhs_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] lhs_stride_y Stride of the LHS matrix in Y dimension (in bytes)
+ * @param[in] lhs_step_y lhs_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] lhs_offset_first_element_in_bytes The offset of the first element in the LHS matrix
+ * @param[in] rhs_ptr Pointer to the RHS matrix. Supported data type:
+ * same as @p lhs_ptr
+ * @param[in] rhs_stride_x Stride of the RHS matrix in X dimension (in bytes)
+ * @param[in] rhs_step_x rhs_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] rhs_stride_y Stride of the RHS matrix in Y dimension (in bytes)
+ * @param[in] rhs_step_y rhs_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] rhs_offset_first_element_in_bytes The offset of the first element in the RHS matrix
+ * @param[in] bias_ptr (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in] bias_stride_x (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in] bias_step_x (Optional) bias_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] bias_stride_y (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in] bias_step_y (Optional) bias_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in] bias_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr Pointer to the destination matrix Supported data
+ * type: same as @p lhs_ptr
+ * @param[in] dst_stride_x Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in] dst_step_x dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in] dst_step_y dst_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in] lhs_stride_z Stride of the LHS matrix in Z dimension (in bytes)
+ * @param[in] rhs_stride_z Stride of the RHS matrix in Z dimension (in bytes)
+ * @param[in] bias_stride_z (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in] dst_stride_z Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in] lhs_cross_plane_pad (Optional) Bottom paddings for LHS matrix in unit
+ * of elements (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in] dst_cross_plane_pad (Optional) Bottom paddings for the output matrix
+ * in unit of elements (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_native(IMAGE_DECLARATION(lhs), IMAGE_DECLARATION(rhs),
+#if defined(BETA)
+ IMAGE_DECLARATION(bias),
+#endif // defined(BETA)
+ IMAGE_DECLARATION(dst), uint lhs_stride_z, uint rhs_stride_z,
+#if defined(BETA)
+ uint bias_stride_z,
+#endif // defined(BETA)
+ uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+ ,
+ uint lhs_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ ,
+ uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+ // Block size
+#define RHS_BLOCK_SIZE ((K0) * (N0))
+
+ // RHS offset and step X
+#define RHS_OFFSET_X (RHS_BLOCK_SIZE)
+
+ uint x = get_global_id(0);
+ uint y = get_global_id(1);
+ uint z = get_global_id(2);
+
+#if defined(DUMMY_WORK_ITEMS)
+ if ((x * N0 >= N) || (y * M0 >= M))
+ {
+ return;
+ }
+#endif // defined(DUMMY_WORK_ITEMS)
+
+ // Compute LHS matrix address
+ uint lhs_offset = lhs_offset_first_element_in_bytes + y * M0 * (uint)lhs_stride_y;
+
+ // Compute RHS matrix address
+ uint rhs_offset = rhs_offset_first_element_in_bytes + x * N0 * sizeof(DATA_TYPE);
+
+#if defined(MATRIX_B_DEPTH)
+ // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+ rhs_offset += (z % MATRIX_B_DEPTH) * rhs_stride_z;
+#else // defined(MATRIX_B_DEPTH)
+ rhs_offset += z * rhs_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+ REPEAT_VAR_INIT_TO_CONST(M0, uint, zlhs, 0);
+ REPEAT_VAR_INIT_TO_CONST(16, uint, zero, 0);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+ // The plane (zlhs) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+ CALCULATE_Z_OFFSET(M0, uint, zlhs, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, lhs_cross_plane_pad,
+ lhs_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply lhs_stride_z by DEPTH_GEMM3D
+ lhs_offset += z * lhs_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+ // Add offset for batched GEMM
+ lhs_offset += z * lhs_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+ // Initialize the accumulators
+ REPEAT_VAR_INIT_TO_CONST(M0, VEC_DATA_TYPE(DATA_TYPE, N0), c,
+ 0); // VEC_DATA_TYPE(DATA_TYPE, N0) c0=0,c1=0,c2=0,... c(M0-1)=0;
+
+ int i = 0;
+ for (; i <= (K - K0); i += K0)
+ {
+ // Supported cases (M0, K0):
+ // 1,2 - 1,3 - 1,4 - 1,8 - 1,16
+ // 2,2 - 2,3 - 2,4 - 2,8 - 2,16
+ // 3,2 - 3,3 - 3,4 - 3,8 - 3,16
+ // 4,2 - 4,3 - 4,4 - 4,8 - 4,16
+ // 5,2 - 5,3 - 5,4 - 5,8 - 5,16
+ // 6,2 - 6,3 - 6,4 - 6,8 - 6,16
+ // 7,2 - 7,3 - 7,4 - 7,8 - 7,16
+ // 8,2 - 8,3 - 8,4 - 8,8 - 8,16
+ // Load values from LHS matrix
+ LOAD_BLOCK(M0, K0, DATA_TYPE, a, lhs_ptr, lhs_offset, lhs_stride_y, zlhs);
+
+ // Load values from RHS matrix
+ LOAD_BLOCK(K0, N0, DATA_TYPE, b, rhs_ptr, rhs_offset, rhs_stride_y, zero);
+
+ RHS_VFMA_M0xN0(0, a, b0, c);
+ RHS_VFMA_M0xN0(1, a, b1, c);
+#if K0 > 2
+ RHS_VFMA_M0xN0(2, a, b2, c);
+#endif // K0 > 2
+#if K0 > 3
+ RHS_VFMA_M0xN0(3, a, b3, c);
+#endif // K0 > 3
+#if K0 > 4
+ RHS_VFMA_M0xN0(4, a, b4, c);
+ RHS_VFMA_M0xN0(5, a, b5, c);
+ RHS_VFMA_M0xN0(6, a, b6, c);
+ RHS_VFMA_M0xN0(7, a, b7, c);
+#endif // K0 > 4
+#if K0 > 8
+ RHS_VFMA_M0xN0(8, a, b8, c);
+ RHS_VFMA_M0xN0(9, a, b9, c);
+ RHS_VFMA_M0xN0(A, a, bA, c);
+ RHS_VFMA_M0xN0(B, a, bB, c);
+ RHS_VFMA_M0xN0(C, a, bC, c);
+ RHS_VFMA_M0xN0(D, a, bD, c);
+ RHS_VFMA_M0xN0(E, a, bE, c);
+ RHS_VFMA_M0xN0(F, a, bF, c);
+#endif // K0 > 8
+
+ lhs_offset += K0 * sizeof(DATA_TYPE);
+ rhs_offset += K0 * rhs_stride_y;
+ }
+
+ // Left-over accumulations
+ for (; i < K; ++i)
+ {
+ // Load values from LHS matrix
+ VEC_DATA_TYPE(DATA_TYPE, 2)
+ a0 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 0 * lhs_stride_y + zlhs0));
+#if M0 > 1
+ VEC_DATA_TYPE(DATA_TYPE, 2)
+ a1 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 1 * lhs_stride_y + zlhs1));
+#endif // M0 > 1
+#if M0 > 2
+ VEC_DATA_TYPE(DATA_TYPE, 2)
+ a2 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 2 * lhs_stride_y + zlhs2));
+#endif // M0 > 2
+#if M0 > 3
+ VEC_DATA_TYPE(DATA_TYPE, 2)
+ a3 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 3 * lhs_stride_y + zlhs3));
+#endif // M0 > 3
+#if M0 > 4
+ VEC_DATA_TYPE(DATA_TYPE, 2)
+ a4 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 4 * lhs_stride_y + zlhs4));
+#endif // M0 > 4
+#if M0 > 5
+ VEC_DATA_TYPE(DATA_TYPE, 2)
+ a5 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 5 * lhs_stride_y + zlhs5));
+#endif // M0 > 5
+#if M0 > 6
+ VEC_DATA_TYPE(DATA_TYPE, 2)
+ a6 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 6 * lhs_stride_y + zlhs6));
+#endif // M0 > 6
+#if M0 > 7
+ VEC_DATA_TYPE(DATA_TYPE, 2)
+ a7 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 7 * lhs_stride_y + zlhs7));
+#endif // M0 > 7
+
+ VEC_DATA_TYPE(DATA_TYPE, N0)
+ b = VLOAD(N0)(0, (__global DATA_TYPE *)(rhs_ptr + rhs_offset + 0 * rhs_stride_y));
+ RHS_VFMA_M0xN0(0, a, b, c);
+
+ lhs_offset += sizeof(DATA_TYPE);
+ rhs_offset += rhs_stride_y;
+ }
+
+ __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes +
+ (x * (uint)N0 * sizeof(DATA_TYPE)) + (y * (uint)M0 * dst_stride_y);
+
+ REPEAT_VAR_INIT_TO_CONST(M0, uint, zout, 0);
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ // The plane (zout) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+ CALCULATE_Z_OFFSET(M0, uint, zout, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad,
+ dst_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply dst_stride_z by DEPTH_GEMM3D
+ dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Add offset for batched GEMM
+ dst_addr += z * dst_stride_z;
+
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+ SCALE_BLOCK(M0, DATA_TYPE, c, ALPHA);
+#endif // defined(ALPHA)
+
+ // Add beta*bias
+#if defined(BETA)
+#if defined(BROADCAST_BIAS)
+ __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes +
+ (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE));
+
+ LOAD_BLOCK(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero);
+
+#ifndef UNIT_BETA
+ SCALE_BLOCK(1, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+ // c = c + bias[broadcasted]
+ ADD_BLOCK_BROADCAST(M0, c, bias0);
+
+#else // defined(BROADCAST_BIAS)
+ __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes +
+ (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE)) +
+ (get_global_id(1) * (uint)M0 * bias_stride_y) +
+ get_global_id(2) * bias_stride_z;
+
+ LOAD_BLOCK(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero);
+
+#ifndef UNIT_BETA
+ SCALE_BLOCK(M0, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+ // c = c + bias
+ ADD_BLOCK(M0, c, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+ ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, c, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+ // Store output block
+ STORE_BLOCK(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout);
+
+#undef RHS_BLOCK_SIZE
+#undef RHS_OFFSET_X
+#undef RHS_STEP_X
+}
+#endif // defined(M0) && defined(N0) && defined(K0) && defined(K) && defined(DATA_TYPE)
+
+#if defined(COLS_B) && defined(MULT_TRANSPOSE1XW_WIDTH) && defined(MULT_INTERLEAVE4X4_HEIGHT)
+/** This OpenCL kernel is optimised for Midgard. It computes the matrix multiplication between
+ * matrix A reshaped (src0) and matrix B reshaped (src1)
+ *
+ * @note The number of columns of matrix B and the optional alpha's value need to be passed at
+ * compile time using -DCOLS_B and -DALPHA
+ * @note The multiplication factor for the transposition width (mult_transpose1xW_width) must be
+ * passed at compile time using -DMULT_TRANSPOSE1XW_WIDTH (e.g. -DMULT_TRANSPOSE1XW_WIDTH=2)
+ * @note The multiplication factor for the height of the 4x4 interleaved block must be passed at
+ * compile time using -DMULT_INTERLEAVE4X4_HEIGHT (e.g. -DMULT_INTERLEAVE4X4_HEIGHT=2)
+ * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid
+ * out-of-bounds reads, the number of channels of matrix B must be passed at compile time using
+ * MATRIX_B_DEPTH (e.g. -DMATRIX_B_DEPTH=16) This case can happen when GEMM is used to perform the
+ * element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have
+ * multiple inputs (e.g. a = [K, M, 16, Batches], b = [N, K, 16])
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the output has to be reinterpreted as a 3D tensor (e.g. output of convolution
+ * layer), the following information must be passed at compile time:
+ * -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ * -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ * -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ * (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in] src0_ptr Pointer to the source matrix. Supported data
+ * types: F32
+ * @param[in] src0_stride_x Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in] src0_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src0_stride_y Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in] src0_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in] src1_ptr Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in] src1_stride_x Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in] src1_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src1_stride_y Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in] src1_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in] src2_ptr (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in] src2_stride_x (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in] src2_step_x (Optional) src2_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] src2_stride_y (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in] src2_step_y (Optional) src2_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in] src2_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in] dst_stride_x Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in] dst_step_x dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in] dst_step_y dst_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in] src0_stride_z Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in] src1_stride_z Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in] src2_stride_z (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in] dst_stride_z Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in] cross_plane_pad (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_interleaved_transposed_f32(IMAGE_DECLARATION(src0), IMAGE_DECLARATION(src1),
+#if defined(BETA)
+ IMAGE_DECLARATION(src2),
+#endif // defined(BETA)
+ IMAGE_DECLARATION(dst), uint src0_stride_z,
+ uint src1_stride_z,
+#if defined(BETA)
+ uint src2_stride_z,
+#endif // defined(BETA)
+ uint dst_stride_z
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ ,
+ uint cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+ int x = get_global_id(0) / MULT_TRANSPOSE1XW_WIDTH;
+ int y = get_global_id(1) / MULT_INTERLEAVE4X4_HEIGHT;
+ int z = get_global_id(2);
+
+ // Offset
+ const int offset_row_a = (get_global_id(1) % MULT_INTERLEAVE4X4_HEIGHT) * 4;
+ const int offset_row_b = (get_global_id(0) % MULT_TRANSPOSE1XW_WIDTH) * 4;
+
+ // src_addr_a = address of matrix A
+ // src_addr_b = address of matrix B
+ int src0_addr_in_bytes =
+ z * src0_stride_z + y * src0_stride_y + src0_offset_first_element_in_bytes;
+ int src1_addr_in_bytes = x * src1_stride_y + src1_offset_first_element_in_bytes;
+
+#if defined(MATRIX_B_DEPTH)
+ // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+ src1_addr_in_bytes += (z % MATRIX_B_DEPTH) * src1_stride_z;
+#else // defined(MATRIX_B_DEPTH)
+ src1_addr_in_bytes += z * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+ __global float *src_addr_a = (__global float *)(src0_ptr + src0_addr_in_bytes);
+ __global float *src_addr_b = (__global float *)(src1_ptr + src1_addr_in_bytes);
+
+ // Compute end row address for matrix B
+ __global float *src_end_addr_b = src_addr_b + COLS_B;
+
+ src_addr_a += offset_row_a;
+ src_addr_b += offset_row_b;
+
+ // Reset accumulators
+ float4 c0 = 0.0f;
+ float4 c1 = 0.0f;
+ float4 c2 = 0.0f;
+ float4 c3 = 0.0f;
+
+ for (; src_addr_b <= (src_end_addr_b - (int)(8 * MULT_TRANSPOSE1XW_WIDTH));
+ src_addr_a += 8 * MULT_INTERLEAVE4X4_HEIGHT, src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH)
+ {
+ // Load values from matrix A (interleaved) and matrix B (transposed)
+ float4 a0 = vload4(0, src_addr_a);
+ float4 b0 = vload4(0, src_addr_b);
+
+ c0 += (float4)a0.s0 * b0;
+ c1 += (float4)a0.s1 * b0;
+ c2 += (float4)a0.s2 * b0;
+ c3 += (float4)a0.s3 * b0;
+
+ // Load values from matrix A (interleaved) and matrix B (transposed)
+ a0 = vload4(0, src_addr_a + 4 * MULT_INTERLEAVE4X4_HEIGHT);
+ b0 = vload4(0, src_addr_b + 4 * MULT_TRANSPOSE1XW_WIDTH);
+
+ c0 += (float4)a0.s0 * b0;
+ c1 += (float4)a0.s1 * b0;
+ c2 += (float4)a0.s2 * b0;
+ c3 += (float4)a0.s3 * b0;
+ }
+
+ for (; src_addr_b < src_end_addr_b;
+ src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT, src_addr_b += 4 * MULT_TRANSPOSE1XW_WIDTH)
+ {
+ // Load values from matrix A (interleaved) and matrix B (transposed)
+ float4 a0 = vload4(0, src_addr_a);
+ float4 b0 = vload4(0, src_addr_b);
+
+ c0 += (float4)a0.s0 * b0;
+ c1 += (float4)a0.s1 * b0;
+ c2 += (float4)a0.s2 * b0;
+ c3 += (float4)a0.s3 * b0;
+ }
+
+ // Compute destination address
+ Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+ // Compute dst address
+ __global uchar *dst_addr = offset(&dst, 0, 0);
+
+ uint4 zout = 0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+ // the z dimension in order to take into account the presence of possible cross plane paddings
+ //
+ // | |
+ // | plane0 |
+ // | |
+ // |__________________|
+ // |******************|
+ // | cross_plane_pad |
+ // |******************|
+ // | |
+ // | plane1 |
+ // | |
+ // |__________________|
+
+ // The plane (zout) is calculated dividing M (get_global_id(1) * 4) by HEIGHT_GEMM3D
+ zout = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * 4)) / (uint4)HEIGHT_GEMM3D;
+ zout = min(DEPTH_GEMM3D - 1, zout);
+
+ // Add offset due to the cross plane paddings
+ zout *= (cross_plane_pad * dst_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply dst_stride_z by DEPTH_GEMM3D
+ dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+ // Add offset for batched GEMM
+ dst_addr += z * dst_stride_z;
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+ SCALE_BLOCK(4, float, c, ALPHA);
+#endif // defined(ALPHA)
+
+ // Add beta*bias
+#if defined(BETA)
+ REPEAT_VAR_INIT_TO_CONST(4, uint, zero, 0);
+
+#if defined(BROADCAST_BIAS)
+ __global uchar *src2_addr =
+ src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)4 * sizeof(float));
+
+ LOAD_BLOCK(1, 4, float, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+ SCALE_BLOCK(1, float, bias, BETA);
+#endif // UNIT_BIAS
+
+ // c = c + bias[broadcasted]
+ ADD_BLOCK_BROADCAST(4, c, bias0);
+
+#else // defined(BROADCAST_BIAS)
+ __global uchar *src2_addr =
+ src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)4 * sizeof(float)) +
+ (get_global_id(1) * (uint)4 * src2_stride_y) + get_global_id(2) * src2_stride_z;
+
+ LOAD_BLOCK(4, 4, float, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+ SCALE_BLOCK(4, float, bias, BETA);
+#endif // UNIT_BIAS
+
+ // c = c + bias
+ ADD_BLOCK(4, c, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+ ACTIVATION_BLOCK(4, ACTIVATION_TYPE, float, c, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+ // Store 4x4 block
+ vstore4(c0, 0, (__global float *)(dst_addr + 0 * dst_stride_y + zout.s0));
+ vstore4(c1, 0, (__global float *)(dst_addr + 1 * dst_stride_y + zout.s1));
+ vstore4(c2, 0, (__global float *)(dst_addr + 2 * dst_stride_y + zout.s2));
+ vstore4(c3, 0, (__global float *)(dst_addr + 3 * dst_stride_y + zout.s3));
+}
+
+/** This OpenCL kernel is optimized for Bifrost and tt computes the matrix multiplication between
+ * matrix A reshaped (src0) and matrix B reshaped (src1)
+ *
+ * @note The number of columns of matrix B and the optional alpha's value need to be passed at
+ * compile time using -DCOLS_B and -DALPHA
+ * @note The multiplication factor for the transposition width (mult_transpose1xW_width) must be
+ * passed at compile time using -DMULT_TRANSPOSE1XW_WIDTH (e.g. -DMULT_TRANSPOSE1XW_WIDTH=2)
+ * @note The multiplication factor for the height of the 4x4 interleaved block must be passed at
+ * compile time using -DMULT_INTERLEAVE4X4_HEIGHT (e.g. -DMULT_INTERLEAVE4X4_HEIGHT=2)
+ * @note The multiplication factor for the height of the 4x4 interleaved block must be passed at
+ * compile time using -DMULT_INTERLEAVE4X4_HEIGHT (e.g. -DMULT_INTERLEAVE4X4_HEIGHT=2)
+ * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid
+ * out-of-bounds reads, the number of channels of matrix B must be passed at compile time using
+ * MATRIX_B_DEPTH (e.g. -DMATRIX_B_DEPTH=16) This case can happen when GEMM is used to perform the
+ * element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have
+ * multiple inputs (e.g. a = [K, M, 16, Batches], b = [N, K, 16])
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the output has to be reinterpreted as a 3D tensor (e.g. output of convolution
+ * layer), the following information must be passed at compile time:
+ * -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ * -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ * -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ * (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in] src0_ptr Pointer to the source matrix. Supported data
+ * types: F32
+ * @param[in] src0_stride_x Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in] src0_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src0_stride_y Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in] src0_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in] src1_ptr Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in] src1_stride_x Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in] src1_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src1_stride_y Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in] src1_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in] src2_ptr (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in] src2_stride_x (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in] src2_step_x (Optional) src2_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] src2_stride_y (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in] src2_step_y (Optional) src2_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in] src2_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in] dst_stride_x Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in] dst_step_x dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in] dst_step_y dst_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in] src0_stride_z Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in] src1_stride_z Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in] src2_stride_z (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in] dst_stride_z Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in] cross_plane_pad (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_interleaved_transposed_f32_bifrost(IMAGE_DECLARATION(src0),
+ IMAGE_DECLARATION(src1),
+#if defined(BETA)
+ IMAGE_DECLARATION(src2),
+#endif // defined(BETA)
+ IMAGE_DECLARATION(dst), uint src0_stride_z,
+ uint src1_stride_z,
+#if defined(BETA)
+ uint src2_stride_z,
+#endif // defined(BETA)
+ uint dst_stride_z
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ ,
+ uint cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+ int x = get_global_id(0) / MULT_TRANSPOSE1XW_WIDTH;
+ int y = get_global_id(1) / MULT_INTERLEAVE4X4_HEIGHT;
+ int z = get_global_id(2);
+
+ // Offset
+ const int offset_row_a = (get_global_id(1) % MULT_INTERLEAVE4X4_HEIGHT) * 4;
+ const int offset_row_b = (get_global_id(0) % MULT_TRANSPOSE1XW_WIDTH) * 4;
+
+ // src_addr_a = address of matrix A
+ // src_addr_b = address of matrix B
+ int src0_addr_in_bytes =
+ z * src0_stride_z + y * src0_stride_y + src0_offset_first_element_in_bytes;
+ int src1_addr_in_bytes = x * src1_stride_y + src1_offset_first_element_in_bytes;
+
+#if defined(MATRIX_B_DEPTH)
+ // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+ src1_addr_in_bytes += (z % MATRIX_B_DEPTH) * src1_stride_z;
+#else // defined(MATRIX_B_DEPTH)
+ src1_addr_in_bytes += z * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+ __global float *src_addr_a = (__global float *)(src0_ptr + src0_addr_in_bytes);
+ __global float *src_addr_b = (__global float *)(src1_ptr + src1_addr_in_bytes);
+
+ src_addr_a += offset_row_a;
+ src_addr_b += offset_row_b;
+
+ // Reset accumulators
+ float4 c0 = 0.0f;
+ float4 c1 = 0.0f;
+ float4 c2 = 0.0f;
+ float4 c3 = 0.0f;
+
+#define COLS_MTX_B (COLS_B / (4 * MULT_TRANSPOSE1XW_WIDTH))
+
+ int i = 0;
+ for (; i <= (int)(COLS_MTX_B - 4); i += 4)
+ {
+ // Load values from matrix A (interleaved) and matrix B (transposed)
+ float4 a0 = vload4(0, src_addr_a);
+ float4 b0 = vload4(0, src_addr_b);
+
+ src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT;
+ src_addr_b += 4 * MULT_TRANSPOSE1XW_WIDTH;
+
+ c0.s0 = fma(a0.s0, b0.s0, c0.s0);
+ c0.s1 = fma(a0.s0, b0.s1, c0.s1);
+ c0.s2 = fma(a0.s0, b0.s2, c0.s2);
+ c0.s3 = fma(a0.s0, b0.s3, c0.s3);
+
+ c1.s0 = fma(a0.s1, b0.s0, c1.s0);
+ c1.s1 = fma(a0.s1, b0.s1, c1.s1);
+ c1.s2 = fma(a0.s1, b0.s2, c1.s2);
+ c1.s3 = fma(a0.s1, b0.s3, c1.s3);
+
+ c2.s0 = fma(a0.s2, b0.s0, c2.s0);
+ c2.s1 = fma(a0.s2, b0.s1, c2.s1);
+ c2.s2 = fma(a0.s2, b0.s2, c2.s2);
+ c2.s3 = fma(a0.s2, b0.s3, c2.s3);
+
+ c3.s0 = fma(a0.s3, b0.s0, c3.s0);
+ c3.s1 = fma(a0.s3, b0.s1, c3.s1);
+ c3.s2 = fma(a0.s3, b0.s2, c3.s2);
+ c3.s3 = fma(a0.s3, b0.s3, c3.s3);
+
+ // Load values from matrix A (interleaved) and matrix B (transposed)
+ a0 = vload4(0, src_addr_a);
+ b0 = vload4(0, src_addr_b);
+
+ src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT;
+ src_addr_b += 4 * MULT_TRANSPOSE1XW_WIDTH;
+
+ c0.s0 = fma(a0.s0, b0.s0, c0.s0);
+ c0.s1 = fma(a0.s0, b0.s1, c0.s1);
+ c0.s2 = fma(a0.s0, b0.s2, c0.s2);
+ c0.s3 = fma(a0.s0, b0.s3, c0.s3);
+
+ c1.s0 = fma(a0.s1, b0.s0, c1.s0);
+ c1.s1 = fma(a0.s1, b0.s1, c1.s1);
+ c1.s2 = fma(a0.s1, b0.s2, c1.s2);
+ c1.s3 = fma(a0.s1, b0.s3, c1.s3);
+
+ c2.s0 = fma(a0.s2, b0.s0, c2.s0);
+ c2.s1 = fma(a0.s2, b0.s1, c2.s1);
+ c2.s2 = fma(a0.s2, b0.s2, c2.s2);
+ c2.s3 = fma(a0.s2, b0.s3, c2.s3);
+
+ c3.s0 = fma(a0.s3, b0.s0, c3.s0);
+ c3.s1 = fma(a0.s3, b0.s1, c3.s1);
+ c3.s2 = fma(a0.s3, b0.s2, c3.s2);
+ c3.s3 = fma(a0.s3, b0.s3, c3.s3);
+
+ // Load values from matrix A (interleaved) and matrix B (transposed)
+ a0 = vload4(0, src_addr_a);
+ b0 = vload4(0, src_addr_b);
+
+ src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT;
+ src_addr_b += 4 * MULT_TRANSPOSE1XW_WIDTH;
+
+ c0.s0 = fma(a0.s0, b0.s0, c0.s0);
+ c0.s1 = fma(a0.s0, b0.s1, c0.s1);
+ c0.s2 = fma(a0.s0, b0.s2, c0.s2);
+ c0.s3 = fma(a0.s0, b0.s3, c0.s3);
+
+ c1.s0 = fma(a0.s1, b0.s0, c1.s0);
+ c1.s1 = fma(a0.s1, b0.s1, c1.s1);
+ c1.s2 = fma(a0.s1, b0.s2, c1.s2);
+ c1.s3 = fma(a0.s1, b0.s3, c1.s3);
+
+ c2.s0 = fma(a0.s2, b0.s0, c2.s0);
+ c2.s1 = fma(a0.s2, b0.s1, c2.s1);
+ c2.s2 = fma(a0.s2, b0.s2, c2.s2);
+ c2.s3 = fma(a0.s2, b0.s3, c2.s3);
+
+ c3.s0 = fma(a0.s3, b0.s0, c3.s0);
+ c3.s1 = fma(a0.s3, b0.s1, c3.s1);
+ c3.s2 = fma(a0.s3, b0.s2, c3.s2);
+ c3.s3 = fma(a0.s3, b0.s3, c3.s3);
+
+ // Load values from matrix A (interleaved) and matrix B (transposed)
+ a0 = vload4(0, src_addr_a);
+ b0 = vload4(0, src_addr_b);
+
+ src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT;
+ src_addr_b += 4 * MULT_TRANSPOSE1XW_WIDTH;
+
+ c0.s0 = fma(a0.s0, b0.s0, c0.s0);
+ c0.s1 = fma(a0.s0, b0.s1, c0.s1);
+ c0.s2 = fma(a0.s0, b0.s2, c0.s2);
+ c0.s3 = fma(a0.s0, b0.s3, c0.s3);
+
+ c1.s0 = fma(a0.s1, b0.s0, c1.s0);
+ c1.s1 = fma(a0.s1, b0.s1, c1.s1);
+ c1.s2 = fma(a0.s1, b0.s2, c1.s2);
+ c1.s3 = fma(a0.s1, b0.s3, c1.s3);
+
+ c2.s0 = fma(a0.s2, b0.s0, c2.s0);
+ c2.s1 = fma(a0.s2, b0.s1, c2.s1);
+ c2.s2 = fma(a0.s2, b0.s2, c2.s2);
+ c2.s3 = fma(a0.s2, b0.s3, c2.s3);
+
+ c3.s0 = fma(a0.s3, b0.s0, c3.s0);
+ c3.s1 = fma(a0.s3, b0.s1, c3.s1);
+ c3.s2 = fma(a0.s3, b0.s2, c3.s2);
+ c3.s3 = fma(a0.s3, b0.s3, c3.s3);
+ }
+
+ for (; i < (int)(COLS_MTX_B); ++i)
+ {
+ // Load values from matrix A (interleaved) and matrix B (transposed)
+ float4 a0 = vload4(0, src_addr_a);
+ float4 b0 = vload4(0, src_addr_b);
+
+ src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT;
+ src_addr_b += 4 * MULT_TRANSPOSE1XW_WIDTH;
+
+ c0.s0 = fma(a0.s0, b0.s0, c0.s0);
+ c0.s1 = fma(a0.s0, b0.s1, c0.s1);
+ c0.s2 = fma(a0.s0, b0.s2, c0.s2);
+ c0.s3 = fma(a0.s0, b0.s3, c0.s3);
+
+ c1.s0 = fma(a0.s1, b0.s0, c1.s0);
+ c1.s1 = fma(a0.s1, b0.s1, c1.s1);
+ c1.s2 = fma(a0.s1, b0.s2, c1.s2);
+ c1.s3 = fma(a0.s1, b0.s3, c1.s3);
+
+ c2.s0 = fma(a0.s2, b0.s0, c2.s0);
+ c2.s1 = fma(a0.s2, b0.s1, c2.s1);
+ c2.s2 = fma(a0.s2, b0.s2, c2.s2);
+ c2.s3 = fma(a0.s2, b0.s3, c2.s3);
+
+ c3.s0 = fma(a0.s3, b0.s0, c3.s0);
+ c3.s1 = fma(a0.s3, b0.s1, c3.s1);
+ c3.s2 = fma(a0.s3, b0.s2, c3.s2);
+ c3.s3 = fma(a0.s3, b0.s3, c3.s3);
+ }
+
+ // Compute destination address
+ Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+ // Compute dst address
+ __global uchar *dst_addr = offset(&dst, 0, 0);
+
+ uint4 zout = 0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+ // the z dimension in order to take into account the presence of possible cross plane paddings
+ //
+ // | |
+ // | plane0 |
+ // | |
+ // |__________________|
+ // |******************|
+ // | cross_plane_pad |
+ // |******************|
+ // | |
+ // | plane1 |
+ // | |
+ // |__________________|
+
+ // The plane (zout) is calculated dividing M (get_global_id(1) * 4) by HEIGHT_GEMM3D
+ zout = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * 4)) / (uint4)HEIGHT_GEMM3D;
+ zout = min(DEPTH_GEMM3D - 1, zout);
+
+ // Add offset due to the cross plane paddings
+ zout *= (cross_plane_pad * dst_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply dst_stride_z by DEPTH_GEMM3D
+ dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+ // Add offset for batched GEMM
+ dst_addr += z * dst_stride_z;
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+ SCALE_BLOCK(4, float, c, ALPHA);
+#endif // defined(ALPHA)
+
+ // Add beta*bias
+#if defined(BETA)
+ REPEAT_VAR_INIT_TO_CONST(4, uint, zero, 0);
+
+#if defined(BROADCAST_BIAS)
+ __global uchar *src2_addr =
+ src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)4 * sizeof(float));
+
+ LOAD_BLOCK(1, 4, float, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+ SCALE_BLOCK(1, float, bias, BETA);
+#endif // UNIT_BIAS
+
+ // c = c + bias[broadcasted]
+ ADD_BLOCK_BROADCAST(4, c, bias0);
+
+#else // defined(BROADCAST_BIAS)
+ __global uchar *src2_addr =
+ src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)4 * sizeof(float)) +
+ (get_global_id(1) * (uint)4 * src2_stride_y) + get_global_id(2) * src2_stride_z;
+
+ LOAD_BLOCK(4, 4, float, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+ SCALE_BLOCK(4, float, bias, BETA);
+#endif // UNIT_BIAS
+
+ // c = c + bias
+ ADD_BLOCK(4, c, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+ ACTIVATION_BLOCK(4, ACTIVATION_TYPE, float, c, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+ // Store 4x4 block
+ vstore4(c0, 0, (__global float *)(dst_addr + 0 * dst_stride_y + zout.s0));
+ vstore4(c1, 0, (__global float *)(dst_addr + 1 * dst_stride_y + zout.s1));
+ vstore4(c2, 0, (__global float *)(dst_addr + 2 * dst_stride_y + zout.s2));
+ vstore4(c3, 0, (__global float *)(dst_addr + 3 * dst_stride_y + zout.s3));
+}
+
+// Undefine local defines
+#undef COLS_MTX_B
+
+#if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED)
+/** This OpenCL kernel computes the matrix multiplication between matrix A reshaped (src0) and
+ * matrix B reshaped (src1)
+ *
+ * @note The number of columns of matrix B and the optional alpha's value need to be passed at
+ * compile time using -DCOLS_B and -DALPHA
+ * @note The multiplication factor for the transposition width (mult_transpose1xW_width) must be
+ * passed at compile time using -DMULT_TRANSPOSE1XW_WIDTH (e.g. -DMULT_TRANSPOSE1XW_WIDTH=2)
+ * @note The multiplication factor for the height of the 4x4 interleaved block must be passed at
+ * compile time using -DMULT_INTERLEAVE4X4_HEIGHT (e.g. -DMULT_INTERLEAVE4X4_HEIGHT=2)
+ * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid
+ * out-of-bounds reads, the number of channels of matrix B must be passed at compile time using
+ * MATRIX_B_DEPTH (e.g. -DMATRIX_B_DEPTH=16) This case can happen when GEMM is used to perform the
+ * element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have
+ * multiple inputs (e.g. a = [K, M, 16, Batches], b = [N, K, 16])
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the output has to be reinterpreted as a 3D tensor (e.g. output of convolution
+ * layer), the following information must be passed at compile time:
+ * -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ * -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ * -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ * (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in] src0_ptr Pointer to the source matrix. Supported data
+ * types: F16
+ * @param[in] src0_stride_x Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in] src0_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src0_stride_y Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in] src0_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in] src1_ptr Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in] src1_stride_x Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in] src1_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src1_stride_y Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in] src1_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in] src2_ptr (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in] src2_stride_x (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in] src2_step_x (Optional) src2_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] src2_stride_y (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in] src2_step_y (Optional) src2_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in] src2_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in] dst_stride_x Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in] dst_step_x dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in] dst_step_y dst_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in] src0_stride_z Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in] src1_stride_z Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in] src2_stride_z (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in] dst_stride_z Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in] cross_plane_pad (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_interleaved_transposed_f16(IMAGE_DECLARATION(src0), IMAGE_DECLARATION(src1),
+#if defined(BETA)
+ IMAGE_DECLARATION(src2),
+#endif // defined(BETA)
+ IMAGE_DECLARATION(dst), uint src0_stride_z,
+ uint src1_stride_z,
+#if defined(BETA)
+ uint src2_stride_z,
+#endif // defined(BETA)
+ uint dst_stride_z
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ ,
+ uint cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+ int x = get_global_id(0) / MULT_TRANSPOSE1XW_WIDTH;
+ int y = get_global_id(1) / MULT_INTERLEAVE4X4_HEIGHT;
+ int z = get_global_id(2);
+
+ // Offset
+ const int offset_row_a = (get_global_id(1) % MULT_INTERLEAVE4X4_HEIGHT) * 4;
+ const int offset_row_b = (get_global_id(0) % MULT_TRANSPOSE1XW_WIDTH) * 8;
+
+ // src_addr_a = address of matrix A
+ // src_addr_b = address of matrix B
+ int src0_addr_in_bytes =
+ z * src0_stride_z + y * src0_stride_y + src0_offset_first_element_in_bytes;
+ int src1_addr_in_bytes = x * src1_stride_y + src1_offset_first_element_in_bytes;
+
+#if defined(MATRIX_B_DEPTH)
+ // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+ src1_addr_in_bytes += (z % MATRIX_B_DEPTH) * src1_stride_z;
+#else // defined(MATRIX_B_DEPTH)
+ src1_addr_in_bytes += z * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+ __global half *src_addr_a = (__global half *)(src0_ptr + src0_addr_in_bytes);
+ __global half *src_addr_b = (__global half *)(src1_ptr + src1_addr_in_bytes);
+
+ // Compute end row address for matrix B
+ __global half *src_end_addr_b = src_addr_b + COLS_B;
+
+ src_addr_a += offset_row_a;
+ src_addr_b += offset_row_b;
+
+ // Reset accumulators
+ half8 c0 = 0.0f;
+ half8 c1 = 0.0f;
+ half8 c2 = 0.0f;
+ half8 c3 = 0.0f;
+
+ for (; src_addr_b <= (src_end_addr_b - (int)(16 * MULT_TRANSPOSE1XW_WIDTH));
+ src_addr_a += 8 * MULT_INTERLEAVE4X4_HEIGHT, src_addr_b += 16 * MULT_TRANSPOSE1XW_WIDTH)
+ {
+ // Load values from matrix A (interleaved) and matrix B (transposed)
+ half4 a0 = vload4(0, src_addr_a);
+ half8 b0 = vload8(0, src_addr_b);
+
+ c0 += (half8)a0.s0 * b0;
+ c1 += (half8)a0.s1 * b0;
+ c2 += (half8)a0.s2 * b0;
+ c3 += (half8)a0.s3 * b0;
+
+ // Load values from matrix A (interleaved) and matrix B (transposed)
+ a0 = vload4(0, src_addr_a + 4 * MULT_INTERLEAVE4X4_HEIGHT);
+ b0 = vload8(0, src_addr_b + 8 * MULT_TRANSPOSE1XW_WIDTH);
+
+ c0 += (half8)a0.s0 * b0;
+ c1 += (half8)a0.s1 * b0;
+ c2 += (half8)a0.s2 * b0;
+ c3 += (half8)a0.s3 * b0;
+ }
+
+ for (; src_addr_b < src_end_addr_b;
+ src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT, src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH)
+ {
+ // Load values from matrix A (interleaved) and matrix B (transposed)
+ half4 a0 = vload4(0, src_addr_a);
+ half8 b0 = vload8(0, src_addr_b);
+
+ c0 += (half8)a0.s0 * b0;
+ c1 += (half8)a0.s1 * b0;
+ c2 += (half8)a0.s2 * b0;
+ c3 += (half8)a0.s3 * b0;
+ }
+
+ // Compute destination address
+ Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+ // Compute dst address
+ __global uchar *dst_addr = offset(&dst, 0, 0);
+
+ uint4 zout = 0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+ // the z dimension in order to take into account the presence of possible cross plane paddings
+ //
+ // | |
+ // | plane0 |
+ // | |
+ // |__________________|
+ // |******************|
+ // | cross_plane_pad |
+ // |******************|
+ // | |
+ // | plane1 |
+ // | |
+ // |__________________|
+
+ // The plane (zout) is calculated dividing M (get_global_id(1) * 4) by HEIGHT_GEMM3D
+ zout = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * 4)) / (uint4)HEIGHT_GEMM3D;
+ zout = min(DEPTH_GEMM3D - 1, zout);
+
+ // Add offset due to the cross plane paddings
+ zout *= (cross_plane_pad * dst_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply dst_stride_z by DEPTH_GEMM3D
+ dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+ // Add offset for batched GEMM
+ dst_addr += z * dst_stride_z;
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+ SCALE_BLOCK(4, half, c, ALPHA);
+#endif // defined(ALPHA)
+
+ // Add beta*bias
+#if defined(BETA)
+ REPEAT_VAR_INIT_TO_CONST(4, uint, zero, 0);
+
+#if defined(BROADCAST_BIAS)
+ __global uchar *src2_addr =
+ src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)8 * sizeof(half));
+
+ LOAD_BLOCK(1, 8, half, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+ SCALE_BLOCK(1, half, bias, BETA);
+#endif // UNIT_BIAS
+
+ // c = c + bias[broadcasted]
+ ADD_BLOCK_BROADCAST(4, c, bias0);
+
+#else // defined(BROADCAST_BIAS)
+
+ __global uchar *src2_addr =
+ src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)8 * sizeof(half)) +
+ (get_global_id(1) * (uint)4 * src2_stride_y) + get_global_id(2) * src2_stride_z;
+
+ LOAD_BLOCK(4, 8, half, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+ SCALE_BLOCK(4, half, bias, BETA);
+#endif // UNIT_BIAS
+
+ // c = c + bias
+ ADD_BLOCK(4, c, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+ ACTIVATION_BLOCK(4, ACTIVATION_TYPE, half, c, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+ // Store 4x8 block
+ vstore8(c0, 0, (__global half *)(dst_addr + 0 * dst_stride_y + zout.s0));
+ vstore8(c1, 0, (__global half *)(dst_addr + 1 * dst_stride_y + zout.s1));
+ vstore8(c2, 0, (__global half *)(dst_addr + 2 * dst_stride_y + zout.s2));
+ vstore8(c3, 0, (__global half *)(dst_addr + 3 * dst_stride_y + zout.s3));
+}
+
+/** This OpenCL kernel computes the matrix multiplication between matrix A reshaped (src0) and
+ * matrix B reshaped (src1) while accumulating the result in a 32 floating point variable.
+ *
+ * @note The number of columns of matrix B and the optional alpha's value need to be passed at
+ * compile time using -DCOLS_B and -DALPHA
+ * @note The multiplication factor for the transposition width (mult_transpose1xW_width) must be
+ * passed at compile time using -DMULT_TRANSPOSE1XW_WIDTH (e.g. -DMULT_TRANSPOSE1XW_WIDTH=2)
+ * @note The multiplication factor for the height of the 4x4 interleaved block must be passed at
+ * compile time using -DMULT_INTERLEAVE4X4_HEIGHT (e.g. -DMULT_INTERLEAVE4X4_HEIGHT=2)
+ * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid
+ * out-of-bounds reads, the number of channels of matrix B must be passed at compile time using
+ * MATRIX_B_DEPTH (e.g. -DMATRIX_B_DEPTH=16) This case can happen when GEMM is used to perform the
+ * element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have
+ * multiple inputs (e.g. a = [K, M, 16, Batches], b = [N, K, 16])
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the output has to be reinterpreted as a 3D tensor (e.g. output of convolution
+ * layer), the following information must be passed at compile time:
+ * -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ * -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ * -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ * (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in] src0_ptr Pointer to the source matrix. Supported data
+ * types: F16
+ * @param[in] src0_stride_x Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in] src0_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src0_stride_y Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in] src0_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in] src1_ptr Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in] src1_stride_x Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in] src1_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src1_stride_y Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in] src1_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in] src2_ptr (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in] src2_stride_x (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in] src2_step_x (Optional) src2_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] src2_stride_y (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in] src2_step_y (Optional) src2_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in] src2_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in] dst_stride_x Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in] dst_step_x dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in] dst_step_y dst_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in] src0_stride_z Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in] src1_stride_z Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in] src2_stride_z (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in] dst_stride_z Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in] cross_plane_pad (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_interleaved_transposed_f16_acc32(IMAGE_DECLARATION(src0),
+ IMAGE_DECLARATION(src1),
+#if defined(BETA)
+ IMAGE_DECLARATION(src2),
+#endif // defined(BETA)
+ IMAGE_DECLARATION(dst), uint src0_stride_z,
+ uint src1_stride_z,
+#if defined(BETA)
+ uint src2_stride_z,
+#endif // defined(BETA)
+ uint dst_stride_z
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ ,
+ uint cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+ int x = get_global_id(0) / MULT_TRANSPOSE1XW_WIDTH;
+ int y = get_global_id(1) / MULT_INTERLEAVE4X4_HEIGHT;
+ int z = get_global_id(2);
+
+ // Offset
+ const int offset_row_a = (get_global_id(1) % MULT_INTERLEAVE4X4_HEIGHT) * 4;
+ const int offset_row_b = (get_global_id(0) % MULT_TRANSPOSE1XW_WIDTH) * 8;
+
+ // src_addr_a = address of matrix A
+ // src_addr_b = address of matrix B
+ int src0_addr_in_bytes =
+ z * src0_stride_z + y * src0_stride_y + src0_offset_first_element_in_bytes;
+ int src1_addr_in_bytes = x * src1_stride_y + src1_offset_first_element_in_bytes;
+
+#if defined(MATRIX_B_DEPTH)
+ // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+ src1_addr_in_bytes += (z % MATRIX_B_DEPTH) * src1_stride_z;
+#else // defined(MATRIX_B_DEPTH)
+ src1_addr_in_bytes += z * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+ __global half *src_addr_a = (__global half *)(src0_ptr + src0_addr_in_bytes);
+ __global half *src_addr_b = (__global half *)(src1_ptr + src1_addr_in_bytes);
+
+ // Compute end row address for matrix B
+ __global half *src_end_addr_b = src_addr_b + COLS_B;
+
+ src_addr_a += offset_row_a;
+ src_addr_b += offset_row_b;
+
+ // Reset accumulators
+ float8 c0 = 0.0f;
+ float8 c1 = 0.0f;
+ float8 c2 = 0.0f;
+ float8 c3 = 0.0f;
+
+ for (; src_addr_b <= (src_end_addr_b - (int)(16 * MULT_TRANSPOSE1XW_WIDTH));
+ src_addr_a += 8 * MULT_INTERLEAVE4X4_HEIGHT, src_addr_b += 16 * MULT_TRANSPOSE1XW_WIDTH)
+ {
+ // Load values from matrix A (interleaved) and matrix B (transposed)
+ float4 a0 = convert_float4(vload4(0, src_addr_a));
+ float8 b0 = convert_float8(vload8(0, src_addr_b));
+
+ c0 += (float8)a0.s0 * b0;
+ c1 += (float8)a0.s1 * b0;
+ c2 += (float8)a0.s2 * b0;
+ c3 += (float8)a0.s3 * b0;
+
+ // Load values from matrix A (interleaved) and matrix B (transposed)
+ a0 = convert_float4(vload4(0, src_addr_a + 4 * MULT_INTERLEAVE4X4_HEIGHT));
+ b0 = convert_float8(vload8(0, src_addr_b + 8 * MULT_TRANSPOSE1XW_WIDTH));
+
+ c0 += (float8)a0.s0 * b0;
+ c1 += (float8)a0.s1 * b0;
+ c2 += (float8)a0.s2 * b0;
+ c3 += (float8)a0.s3 * b0;
+ }
+
+ for (; src_addr_b < src_end_addr_b;
+ src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT, src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH)
+ {
+ // Load values from matrix A (interleaved) and matrix B (transposed)
+ float4 a0 = convert_float4(vload4(0, src_addr_a));
+ float8 b0 = convert_float8(vload8(0, src_addr_b));
+
+ c0 += (float8)a0.s0 * b0;
+ c1 += (float8)a0.s1 * b0;
+ c2 += (float8)a0.s2 * b0;
+ c3 += (float8)a0.s3 * b0;
+ }
+
+ // Compute destination address
+ Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+ // Compute dst address
+ __global uchar *dst_addr = offset(&dst, 0, 0);
+
+ uint4 zout = 0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+ // the z dimension in order to take into account the presence of possible cross plane paddings
+ //
+ // | |
+ // | plane0 |
+ // | |
+ // |__________________|
+ // |******************|
+ // | cross_plane_pad |
+ // |******************|
+ // | |
+ // | plane1 |
+ // | |
+ // |__________________|
+
+ // The plane (zout) is calculated dividing M (get_global_id(1) * 4) by HEIGHT_GEMM3D
+ zout = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * 4)) / (uint4)HEIGHT_GEMM3D;
+ zout = min(DEPTH_GEMM3D - 1, zout);
+
+ // Add offset due to the cross plane paddings
+ zout *= (cross_plane_pad * dst_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply dst_stride_z by DEPTH_GEMM3D
+ dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+ // Add offset for batched GEMM
+ dst_addr += z * dst_stride_z;
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+ SCALE_BLOCK(4, float, c, ALPHA);
+#endif // defined(ALPHA)
+
+#if defined(BETA)
+ REPEAT_VAR_INIT_TO_CONST(4, uint, zero, 0);
+
+#if defined(BROADCAST_BIAS)
+ __global uchar *src2_addr =
+ src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)8 * sizeof(half));
+
+ LOAD_BLOCK(1, 8, half, bias, src2_addr, 0, src2_stride_y, zero);
+
+ float8 bias_f0 = convert_float8(bias0);
+
+#ifndef UNIT_BETA
+ SCALE_BLOCK(1, float, bias_f, BETA);
+#endif // UNIT_BIAS
+
+ // c = c + bias[broadcasted]
+ ADD_BLOCK_BROADCAST(4, c, bias_f0);
+
+#else // defined(BROADCAST_BIAS)
+ __global uchar *src2_addr =
+ src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)8 * sizeof(half)) +
+ (get_global_id(1) * (uint)4 * src2_stride_y) + get_global_id(2) * src2_stride_z;
+
+ LOAD_BLOCK(4, 8, half, bias, src2_addr, 0, src2_stride_y, zero);
+
+ float8 bias_f0 = convert_float8(bias0);
+ float8 bias_f1 = convert_float8(bias1);
+ float8 bias_f2 = convert_float8(bias2);
+ float8 bias_f3 = convert_float8(bias3);
+
+#ifndef UNIT_BETA
+ SCALE_BLOCK(4, float, bias_f, BETA);
+#endif // UNIT_BIAS
+
+ // c = c + bias
+ ADD_BLOCK(4, c, bias_f);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+ half8 c_h0 = convert_half8(c0);
+ half8 c_h1 = convert_half8(c1);
+ half8 c_h2 = convert_half8(c2);
+ half8 c_h3 = convert_half8(c3);
+
+#if defined(ACTIVATION_TYPE)
+ ACTIVATION_BLOCK(4, ACTIVATION_TYPE, half, c_h, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+ // Store 4x8 block
+ vstore8(c_h0, 0, (__global half *)(dst_addr + 0 * dst_stride_y + zout.s0));
+ vstore8(c_h1, 0, (__global half *)(dst_addr + 1 * dst_stride_y + zout.s1));
+ vstore8(c_h2, 0, (__global half *)(dst_addr + 2 * dst_stride_y + zout.s2));
+ vstore8(c_h3, 0, (__global half *)(dst_addr + 3 * dst_stride_y + zout.s3));
+}
+
+/** This OpenCL kernel optimized for Bifrost architectures computes the matrix multiplication
+ * between matrix A reshaped (src0) and matrix B reshaped (src1)
+ *
+ * @note The number of columns of matrix B and the optional alpha's value need to be passed at
+ * compile time using -DCOLS_B and -DALPHA
+ * @note The multiplication factor for the transposition width (mult_transpose1xW_width) must be
+ * passed at compile time using -DMULT_TRANSPOSE1XW_WIDTH (e.g. -DMULT_TRANSPOSE1XW_WIDTH=2)
+ * @note The multiplication factor for the height of the 4x4 interleaved block must be passed at
+ * compile time using -DMULT_INTERLEAVE4X4_HEIGHT (e.g. -DMULT_INTERLEAVE4X4_HEIGHT=2)
+ * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid
+ * out-of-bounds reads, the number of channels of matrix B must be passed at compile time using
+ * MATRIX_B_DEPTH (e.g. -DMATRIX_B_DEPTH=16) This case can happen when GEMM is used to perform the
+ * element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have
+ * multiple inputs (e.g. a = [K, M, 16, Batches], b = [N, K, 16])
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the output has to be reinterpreted as a 3D tensor (e.g. output of convolution
+ * layer), the following information must be passed at compile time:
+ * -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ * -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ * -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ * (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in] src0_ptr Pointer to the source matrix. Supported data
+ * types: F16
+ * @param[in] src0_stride_x Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in] src0_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src0_stride_y Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in] src0_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in] src1_ptr Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in] src1_stride_x Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in] src1_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src1_stride_y Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in] src1_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in] src2_ptr (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in] src2_stride_x (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in] src2_step_x (Optional) src2_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] src2_stride_y (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in] src2_step_y (Optional) src2_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in] src2_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in] dst_stride_x Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in] dst_step_x dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in] dst_step_y dst_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in] src0_stride_z Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in] src1_stride_z Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in] src2_stride_z (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in] cross_plane_pad (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_interleaved_transposed_f16_bifrost(IMAGE_DECLARATION(src0),
+ IMAGE_DECLARATION(src1),
+#if defined(BETA)
+ IMAGE_DECLARATION(src2),
+#endif // defined(BETA)
+ IMAGE_DECLARATION(dst), uint src0_stride_z,
+ uint src1_stride_z,
+#if defined(BETA)
+ uint src2_stride_z,
+#endif // defined(BETA)
+ uint dst_stride_z
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ ,
+ uint cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+ int x = get_global_id(0) / MULT_TRANSPOSE1XW_WIDTH;
+ int y = get_global_id(1) / MULT_INTERLEAVE4X4_HEIGHT;
+ int z = get_global_id(2);
+
+ // Offset
+ const int offset_row_a = (get_global_id(1) % MULT_INTERLEAVE4X4_HEIGHT) * 4;
+ const int offset_row_b = (get_global_id(0) % MULT_TRANSPOSE1XW_WIDTH) * 8;
+
+ // src_addr_a = address of matrix A
+ // src_addr_b = address of matrix B
+ int src0_addr_in_bytes =
+ z * src0_stride_z + y * src0_stride_y + src0_offset_first_element_in_bytes;
+ int src1_addr_in_bytes = x * src1_stride_y + src1_offset_first_element_in_bytes;
+
+#if defined(MATRIX_B_DEPTH)
+ // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+ src1_addr_in_bytes += (z % MATRIX_B_DEPTH) * src1_stride_z;
+#else // defined(MATRIX_B_DEPTH)
+ src1_addr_in_bytes += z * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+ __global half *src_addr_a = (__global half *)(src0_ptr + src0_addr_in_bytes);
+ __global half *src_addr_b = (__global half *)(src1_ptr + src1_addr_in_bytes);
+
+ // Compute end row address for matrix B
+ __global half *src_end_addr_b = src_addr_b + COLS_B;
+
+ src_addr_a += offset_row_a;
+ src_addr_b += offset_row_b;
+
+ // Reset accumulators
+ half8 c0 = 0.0f;
+ half8 c1 = 0.0f;
+ half8 c2 = 0.0f;
+ half8 c3 = 0.0f;
+
+#define COLS_MTX_B (COLS_B / (8 * MULT_TRANSPOSE1XW_WIDTH))
+
+ int i = 0;
+ for (; i <= (int)(COLS_MTX_B - 4); i += 4)
+ {
+#if MULT_INTERLEAVE4X4_HEIGHT == 1
+ // Load values from matrix A (interleaved) and matrix B (transposed)
+ half8 a0 = vload8(0, src_addr_a);
+ half8 b0 = vload8(0, src_addr_b);
+
+ src_addr_a += 8 * MULT_INTERLEAVE4X4_HEIGHT;
+ src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH;
+
+ c0 = fma((half8)a0.s0, b0, c0);
+ c1 = fma((half8)a0.s1, b0, c1);
+ c2 = fma((half8)a0.s2, b0, c2);
+ c3 = fma((half8)a0.s3, b0, c3);
+
+ // Load values from matrix B (transposed)
+ b0 = vload8(0, src_addr_b);
+
+ src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH;
+
+ c0 = fma((half8)a0.s4, b0, c0);
+ c1 = fma((half8)a0.s5, b0, c1);
+ c2 = fma((half8)a0.s6, b0, c2);
+ c3 = fma((half8)a0.s7, b0, c3);
+
+ // Load values from matrix A (interleaved) and matrix B (transposed)
+ a0 = vload8(0, src_addr_a);
+ b0 = vload8(0, src_addr_b);
+
+ src_addr_a += 8 * MULT_INTERLEAVE4X4_HEIGHT;
+ src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH;
+
+ c0 = fma((half8)a0.s0, b0, c0);
+ c1 = fma((half8)a0.s1, b0, c1);
+ c2 = fma((half8)a0.s2, b0, c2);
+ c3 = fma((half8)a0.s3, b0, c3);
+
+ // Load values from matrix B (transposed)
+ b0 = vload8(0, src_addr_b);
+
+ src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH;
+
+ c0 = fma((half8)a0.s4, b0, c0);
+ c1 = fma((half8)a0.s5, b0, c1);
+ c2 = fma((half8)a0.s6, b0, c2);
+ c3 = fma((half8)a0.s7, b0, c3);
+#else // MULT_INTERLEAVE4X4_HEIGHT == 1
+ // Load values from matrix A (interleaved) and matrix B (transposed)
+ half4 a0 = vload4(0, src_addr_a);
+ half8 b0 = vload8(0, src_addr_b);
+
+ src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT;
+ src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH;
+
+ c0 = fma((half8)a0.s0, b0, c0);
+ c1 = fma((half8)a0.s1, b0, c1);
+ c2 = fma((half8)a0.s2, b0, c2);
+ c3 = fma((half8)a0.s3, b0, c3);
+
+ // Load values from matrix A (interleaved) and matrix B (transposed)
+ a0 = vload4(0, src_addr_a);
+ b0 = vload8(0, src_addr_b);
+
+ src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT;
+ src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH;
+
+ c0 = fma((half8)a0.s0, b0, c0);
+ c1 = fma((half8)a0.s1, b0, c1);
+ c2 = fma((half8)a0.s2, b0, c2);
+ c3 = fma((half8)a0.s3, b0, c3);
+
+ // Load values from matrix A (interleaved) and matrix B (transposed)
+ a0 = vload4(0, src_addr_a);
+ b0 = vload8(0, src_addr_b);
+
+ src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT;
+ src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH;
+
+ c0 = fma((half8)a0.s0, b0, c0);
+ c1 = fma((half8)a0.s1, b0, c1);
+ c2 = fma((half8)a0.s2, b0, c2);
+ c3 = fma((half8)a0.s3, b0, c3);
+
+ // Load values from matrix A (interleaved) and matrix B (transposed)
+ a0 = vload4(0, src_addr_a);
+ b0 = vload8(0, src_addr_b);
+
+ src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT;
+ src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH;
+
+ c0 = fma((half8)a0.s0, b0, c0);
+ c1 = fma((half8)a0.s1, b0, c1);
+ c2 = fma((half8)a0.s2, b0, c2);
+ c3 = fma((half8)a0.s3, b0, c3);
+#endif // MULT_INTERLEAVE4X4_HEIGHT == 1
+ }
+
+ for (; i < (int)(COLS_MTX_B); ++i)
+ {
+ // Load values from matrix A (interleaved) and matrix B (transposed)
+ half4 a0 = vload4(0, src_addr_a);
+ half8 b0 = vload8(0, src_addr_b);
+
+ src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT;
+ src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH;
+
+ c0 = fma((half8)a0.s0, b0, c0);
+ c1 = fma((half8)a0.s1, b0, c1);
+ c2 = fma((half8)a0.s2, b0, c2);
+ c3 = fma((half8)a0.s3, b0, c3);
+ }
+
+ // Compute destination address
+ Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+ // Compute dst address
+ __global uchar *dst_addr = offset(&dst, 0, 0);
+
+ uint4 zout = 0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+ // the z dimension in order to take into account the presence of possible cross plane paddings
+ //
+ // | |
+ // | plane0 |
+ // | |
+ // |__________________|
+ // |******************|
+ // | cross_plane_pad |
+ // |******************|
+ // | |
+ // | plane1 |
+ // | |
+ // |__________________|
+
+ // The plane (zout) is calculated dividing M (get_global_id(1) * 4) by HEIGHT_GEMM3D
+ zout = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * 4)) / (uint4)HEIGHT_GEMM3D;
+ zout = min(DEPTH_GEMM3D - 1, zout);
+
+ // Add offset due to the cross plane paddings
+ zout *= (cross_plane_pad * dst_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply dst_stride_z by DEPTH_GEMM3D
+ dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+ // Add offset for batched GEMM
+ dst_addr += z * dst_stride_z;
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+ SCALE_BLOCK(4, half, c, ALPHA);
+#endif // defined(ALPHA)
+
+ // Add beta*bias
+#if defined(BETA)
+ REPEAT_VAR_INIT_TO_CONST(4, uint, zero, 0);
+
+#if defined(BROADCAST_BIAS)
+ __global uchar *src2_addr =
+ src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)8 * sizeof(half));
+
+ LOAD_BLOCK(1, 8, half, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+ SCALE_BLOCK(1, half, bias, BETA);
+#endif // UNIT_BIAS
+
+ // c = c + bias[broadcasted]
+ ADD_BLOCK_BROADCAST(4, c, bias0);
+
+#else // defined(BROADCAST_BIAS)
+ __global uchar *src2_addr =
+ src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)8 * sizeof(half)) +
+ (get_global_id(1) * (uint)4 * src2_stride_y) + get_global_id(2) * src2_stride_z;
+
+ LOAD_BLOCK(4, 8, half, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+ SCALE_BLOCK(4, half, bias, BETA);
+#endif // UNIT_BIAS
+
+ // c = c + bias
+ ADD_BLOCK(4, c, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+ ACTIVATION_BLOCK(4, ACTIVATION_TYPE, half, c, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+ // Store 4x8 block
+ vstore8(c0, 0, (__global half *)(dst_addr + 0 * dst_stride_y + zout.s0));
+ vstore8(c1, 0, (__global half *)(dst_addr + 1 * dst_stride_y + zout.s1));
+ vstore8(c2, 0, (__global half *)(dst_addr + 2 * dst_stride_y + zout.s2));
+ vstore8(c3, 0, (__global half *)(dst_addr + 3 * dst_stride_y + zout.s3));
+}
+
+// Undefine local defines
+#undef COLS_MTX_B
+
+#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED)
+
+#endif // defined(COLS_B) && defined(MULT_TRANSPOSE1XW_WIDTH) && defined(MULT_INTERLEAVE4X4_HEIGHT)
+
+#if defined(COLS_A) && defined(NUM_ELEMS_PROCESSED_PER_THREAD_X) && \
+ (NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+#if defined(DATA_TYPE)
+#define VECTOR_TYPE VEC_DATA_TYPE(DATA_TYPE, NUM_ELEMS_PROCESSED_PER_THREAD_X)
+/** This OpenCL kernel computes the matrix by matrix multiplication between the matrix A (src0) and
+ * matrix B (src1) in case both matrices have not been reshaped.
+ *
+ * @note This OpenCL kernel works with floating point data types (F16/F32)
+ * @note The floating point data type must be passed at compile time using -DDATA_TYPE (e.g.
+ * -DDATA_TYPE=float)
+ * @note The number of elements processed along the x and y directions must be passed at compile
+ * time using -DNUM_ELEMS_PROCESSED_PER_THREAD_X and -DNUM_ELEMS_PROCESSED_PER_THREAD_Y
+ * @note The number of matrix A columns and the optional alpha's value need to be passed at compile
+ * time using -DCOLS_A and -DALPHA
+ * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid
+ * out-of-bounds reads, the number of channels of matrix B must be passed at compile time using
+ * MATRIX_B_DEPTH (e.g. -DMATRIX_B_DEPTH=16) This case can happen when GEMM is used to perform the
+ * element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have
+ * multiple inputs (e.g. a = [K, M, 16, Batches], b = [N, K, 16])
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ * -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ * -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ * -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ * -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ * (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in] src0_ptr Pointer to the source matrix. Supported data
+ * types: F16/F32
+ * @param[in] src0_stride_x Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in] src0_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src0_stride_y Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in] src0_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in] src1_ptr Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in] src1_stride_x Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in] src1_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src1_stride_y Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in] src1_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in] src2_ptr (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in] src2_stride_x (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in] src2_step_x (Optional) src2_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] src2_stride_y (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in] src2_step_y (Optional) src2_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in] src2_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in] dst_stride_x Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in] dst_step_x dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in] src0_stride_z Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in] src1_stride_z Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in] src2_stride_z (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in] dst_stride_z Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in] src_cross_plane_pad (Optional) Bottom paddings in unit of elements for
+ * the input tensor (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in] dst_cross_plane_pad (Optional) Bottom paddings in unit of elements for
+ * the output tensor (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_floating_point(IMAGE_DECLARATION(src0), IMAGE_DECLARATION(src1),
+#if defined(BETA)
+ IMAGE_DECLARATION(src2),
+#endif // defined(BETA)
+ IMAGE_DECLARATION(dst), uint src0_stride_z, uint src1_stride_z,
+#if defined(BETA)
+ uint src2_stride_z,
+#endif // defined(BETA)
+ uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+ ,
+ uint src_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ ,
+ uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+ int idx = get_global_id(0) * NUM_ELEMS_PROCESSED_PER_THREAD_X;
+
+ // Compute starting address for matrix A and Matrix B
+ int2 src_addr = ((int2)(src0_offset_first_element_in_bytes, src1_offset_first_element_in_bytes));
+
+ // Update address for the matrix A
+ src_addr.s0 += get_global_id(1) * src0_stride_y * NUM_ELEMS_PROCESSED_PER_THREAD_Y;
+
+ // Update address for the matrix B
+ src_addr.s1 += idx * sizeof(DATA_TYPE);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+ // Since we load a 2D input tile from a 3D tensor, we need to check when the plane changes across
+ // the z dimension in order to take into account the presence of possible cross plane paddings
+ //
+ // | |
+ // | plane0 |
+ // | |
+ // |__________________|
+ // |******************|
+ // | cross_plane_pad |
+ // |******************|
+ // | |
+ // | plane1 |
+ // | |
+ // |__________________|
+
+ // The plane (zin) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+ // by HEIGHT_GEMM3D
+ uint4 zin = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+ (uint4)HEIGHT_GEMM3D;
+ zin = min(DEPTH_GEMM3D - 1, zin);
+
+ // Add offset due to the cross plane paddings
+ zin *= (src_cross_plane_pad * src0_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply src0_stride_z by DEPTH_GEMM3D
+ src_addr.s0 += get_global_id(2) * src0_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+ // Add offset for batched GEMM
+ src_addr.s0 += get_global_id(2) * src0_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+#if defined(MATRIX_B_DEPTH)
+ // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+ src_addr.s1 += (get_global_id(2) % MATRIX_B_DEPTH) * src1_stride_z;
+#else // defined(MATRIX_B_DEPTH)
+ src_addr.s1 += get_global_id(2) * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+ int end_row_vec_a = src_addr.s0 + (COLS_A * sizeof(DATA_TYPE));
+
+ VECTOR_TYPE acc0 = 0.0f;
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ VECTOR_TYPE acc1 = 0.0f;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ VECTOR_TYPE acc2 = 0.0f;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ VECTOR_TYPE acc3 = 0.0f;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+ for (; src_addr.s0 <= (end_row_vec_a - 2 * (int)sizeof(DATA_TYPE));
+ src_addr += (int2)(2 * sizeof(DATA_TYPE), 2 * src1_stride_y))
+ {
+#if defined(REINTERPRET_INPUT_AS_3D)
+ // Load values from matrix A
+ LOAD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, 2, DATA_TYPE, a, src0_ptr, src_addr.s0,
+ src0_stride_y, zin.s);
+#else // defined(REINTERPRET_INPUT_AS_3D)
+ // Load values from matrix A
+ VEC_DATA_TYPE(DATA_TYPE, 2)
+ a0 = vload2(0, (__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ VEC_DATA_TYPE(DATA_TYPE, 2)
+ a1 = vload2(0, (__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ VEC_DATA_TYPE(DATA_TYPE, 2)
+ a2 = vload2(0, (__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ VEC_DATA_TYPE(DATA_TYPE, 2)
+ a3 = vload2(0, (__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+ // Load values from matrix B
+ VECTOR_TYPE b0 =
+ VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(0, (__global DATA_TYPE *)(src1_ptr + src_addr.s1));
+ VECTOR_TYPE b1 = VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(
+ 0, (__global DATA_TYPE *)(src1_ptr + src_addr.s1 + src1_stride_y));
+
+ // Accumulate
+ acc0 += b0 * (VECTOR_TYPE)a0.s0;
+ acc0 += b1 * (VECTOR_TYPE)a0.s1;
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ acc1 += b0 * (VECTOR_TYPE)a1.s0;
+ acc1 += b1 * (VECTOR_TYPE)a1.s1;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ acc2 += b0 * (VECTOR_TYPE)a2.s0;
+ acc2 += b1 * (VECTOR_TYPE)a2.s1;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ acc3 += b0 * (VECTOR_TYPE)a3.s0;
+ acc3 += b1 * (VECTOR_TYPE)a3.s1;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ }
+
+ for (; src_addr.s0 < end_row_vec_a; src_addr += (int2)(sizeof(DATA_TYPE), src1_stride_y))
+ {
+#if defined(REINTERPRET_INPUT_AS_3D)
+ // Load values from matrix A
+ DATA_TYPE a0 = *((__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y + zin.s0));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ DATA_TYPE a1 = *((__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y + zin.s1));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ DATA_TYPE a2 = *((__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y + zin.s2));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ DATA_TYPE a3 = *((__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y + zin.s3));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#else // defined(REINTERPRET_INPUT_AS_3D)
+ // Load values from matrix A
+ DATA_TYPE a0 = *((__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ DATA_TYPE a1 = *((__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ DATA_TYPE a2 = *((__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ DATA_TYPE a3 = *((__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+ // Load values from matrix B
+ VECTOR_TYPE b0 =
+ VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(0, (__global DATA_TYPE *)(src1_ptr + src_addr.s1));
+
+ // Accumulate
+ acc0 += b0 * (VECTOR_TYPE)a0;
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ acc1 += b0 * (VECTOR_TYPE)a1;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ acc2 += b0 * (VECTOR_TYPE)a2;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ acc3 += b0 * (VECTOR_TYPE)a3;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ }
+
+ int z = get_global_id(2);
+
+ // Compute destination address
+ Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+ // Compute dst address
+ __global uchar *dst_addr = offset(&dst, 0, 0);
+
+ uint4 zout = 0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+ // the z dimension in order to take into account the presence of possible cross plane paddings
+ //
+ // | |
+ // | plane0 |
+ // | |
+ // |__________________|
+ // |******************|
+ // | cross_plane_pad |
+ // |******************|
+ // | |
+ // | plane1 |
+ // | |
+ // |__________________|
+
+ // The plane (zout) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+ // by HEIGHT_GEMM3D
+ zout = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+ (uint4)HEIGHT_GEMM3D;
+ zout = min(DEPTH_GEMM3D - 1, zout);
+
+ // Add offset due to the cross plane paddings
+ zout *= (dst_cross_plane_pad * dst_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply dst_stride_z by DEPTH_GEMM3D
+ dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+ // Add offset for batched GEMM
+ dst_addr += z * dst_stride_z;
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+ SCALE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, DATA_TYPE, acc, ALPHA);
+#endif // defined(ALPHA)
+
+ // Add beta*bias
+#if defined(BETA)
+ REPEAT_VAR_INIT_TO_CONST(NUM_ELEMS_PROCESSED_PER_THREAD_Y, uint, zero, 0);
+
+#if defined(BROADCAST_BIAS)
+ __global uchar *src2_addr =
+ src2_ptr + src2_offset_first_element_in_bytes +
+ (get_global_id(0) * (uint)NUM_ELEMS_PROCESSED_PER_THREAD_X * sizeof(DATA_TYPE));
+
+ LOAD_BLOCK(1, NUM_ELEMS_PROCESSED_PER_THREAD_X, DATA_TYPE, bias, src2_addr, 0, src2_stride_y,
+ zero);
+
+#ifndef UNIT_BETA
+ SCALE_BLOCK(1, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+ // c = c + bias[broadcasted]
+ ADD_BLOCK_BROADCAST(NUM_ELEMS_PROCESSED_PER_THREAD_Y, acc, bias0);
+
+#else // defined(BROADCAST_BIAS)
+ __global uchar *src2_addr =
+ src2_ptr + src2_offset_first_element_in_bytes +
+ (get_global_id(0) * (uint)NUM_ELEMS_PROCESSED_PER_THREAD_X * sizeof(DATA_TYPE)) +
+ (get_global_id(1) * (uint)NUM_ELEMS_PROCESSED_PER_THREAD_Y * src2_stride_y) +
+ get_global_id(2) * src2_stride_z;
+
+ LOAD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, NUM_ELEMS_PROCESSED_PER_THREAD_X, DATA_TYPE, bias,
+ src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+ SCALE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+ // c = c + bias
+ ADD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, acc, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+ ACTIVATION_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, ACTIVATION_TYPE, DATA_TYPE, acc, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+ // Store output block
+ STORE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, NUM_ELEMS_PROCESSED_PER_THREAD_X, DATA_TYPE, acc,
+ dst_addr, dst_stride_y, zout.s);
+}
+#endif // defined(DATA_TYPE)
+
+/** This OpenCL kernel computes the matrix by matrix multiplication between the matrix A (src0) and
+ * matrix B (src1) in case both matrices have not been reshaped
+ *
+ * @note This OpenCL kernel works with the 32-bit floating point data type (float) and uses the fma
+ * units.
+ * @note The number of elements processed along the x and y directions must be passed at compile
+ * time using -DNUM_ELEMS_PROCESSED_PER_THREAD_X and -DNUM_ELEMS_PROCESSED_PER_THREAD_Y. This kernel
+ * optimally uses -DNUM_ELEMS_PROCESSED_PER_THREAD_X=4.
+ * @note The number of matrix A columns must be passed at compile time using -DCOLS_A.
+ * @note The optional value of scalar alpha is passed at compile time using -DALPHA=alpha
+ * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid
+ * out-of-bounds reads, the number of channels of matrix B must be passed at compile time using
+ * MATRIX_B_DEPTH (e.g. -DMATRIX_B_DEPTH=16) This case can happen when GEMM is used to perform the
+ * element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have
+ * multiple inputs (e.g. a = [K, M, 16, Batches], b = [N, K, 16])
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ * -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ * -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ * -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ * -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ * (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in] src0_ptr Pointer to the source matrix. Supported data
+ * types: F32
+ * @param[in] src0_stride_x Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in] src0_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src0_stride_y Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in] src0_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in] src1_ptr Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in] src1_stride_x Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in] src1_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src1_stride_y Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in] src1_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in] src2_ptr (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in] src2_stride_x (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in] src2_step_x (Optional) src2_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] src2_stride_y (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in] src2_step_y (Optional) src2_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in] src2_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in] dst_stride_x Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in] dst_step_x dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in] src0_stride_z Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in] src1_stride_z Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in] src2_stride_z (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in] dst_stride_z Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in] src_cross_plane_pad (Optional) Bottom paddings in unit of elements for
+ * the input tensor (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in] dst_cross_plane_pad (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_floating_point_f32_bifrost(IMAGE_DECLARATION(src0), IMAGE_DECLARATION(src1),
+#if defined(BETA)
+ IMAGE_DECLARATION(src2),
+#endif // defined(BETA)
+ IMAGE_DECLARATION(dst), uint src0_stride_z,
+ uint src1_stride_z,
+#if defined(BETA)
+ uint src2_stride_z,
+#endif // defined(BETA)
+ uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+ ,
+ uint src_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ ,
+ uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+ int idx = get_global_id(0) * NUM_ELEMS_PROCESSED_PER_THREAD_X;
+
+ // Compute starting address for matrix A and matrix B
+ int2 src_addr = ((int2)(src0_offset_first_element_in_bytes, src1_offset_first_element_in_bytes));
+
+ // Update address for matrix A
+ src_addr.s0 += get_global_id(1) * src0_stride_y * NUM_ELEMS_PROCESSED_PER_THREAD_Y;
+
+ // Update address for matrix B
+ src_addr.s1 += idx * sizeof(float);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+ // Since we load a 2D input tile from a 3D tensor, we need to check when the plane changes across
+ // the z dimension in order to take into account the presence of possible cross plane paddings
+ //
+ // | |
+ // | plane0 |
+ // | |
+ // |__________________|
+ // |******************|
+ // | cross_plane_pad |
+ // |******************|
+ // | |
+ // | plane1 |
+ // | |
+ // |__________________|
+
+ // The plane (zin) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+ // by HEIGHT_GEMM3D
+ uint4 zin = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+ (uint4)HEIGHT_GEMM3D;
+ zin = min(DEPTH_GEMM3D - 1, zin);
+
+ // Add offset due to the cross plane paddings
+ zin *= (src_cross_plane_pad * src0_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply src0_stride_z by DEPTH_GEMM3D
+ src_addr.s0 += get_global_id(2) * src0_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+ // Add offset for batched GEMM
+ src_addr.s0 += get_global_id(2) * src0_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+#if defined(MATRIX_B_DEPTH)
+ // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+ src_addr.s1 += (get_global_id(2) % MATRIX_B_DEPTH) * src1_stride_z;
+#else // defined(MATRIX_B_DEPTH)
+ src_addr.s1 += get_global_id(2) * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+ // Initialize accumulators
+ float4 acc0 = 0.0f;
+
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ float4 acc1 = 0.0f;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ float4 acc2 = 0.0f;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ float4 acc3 = 0.0f;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+ // A and B src indices get incremented at the same time.
+ int i = 0;
+ for (; i <= ((int)COLS_A - 4); i += 4)
+ {
+#if defined(REINTERPRET_INPUT_AS_3D)
+ // Load values from matrix A and matrix B
+ LOAD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, 4, float, a, src0_ptr, src_addr.s0, src0_stride_y,
+ zin.s);
+#else // defined(REINTERPRET_INPUT_AS_3D)
+ // Load values from matrix A and matrix B
+ float4 a0 = vload4(0, (__global float *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ float4 a1 = vload4(0, (__global float *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ float4 a2 = vload4(0, (__global float *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ float4 a3 = vload4(0, (__global float *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+ float4 b0 = vload4(0, (__global float *)(src1_ptr + src_addr.s1));
+ src_addr.s1 += src1_stride_y;
+
+ // Multiply and accumulate
+ acc0.s0 = fma(a0.s0, b0.s0, acc0.s0);
+ acc0.s1 = fma(a0.s0, b0.s1, acc0.s1);
+ acc0.s2 = fma(a0.s0, b0.s2, acc0.s2);
+ acc0.s3 = fma(a0.s0, b0.s3, acc0.s3);
+
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+
+ acc1.s0 = fma(a1.s0, b0.s0, acc1.s0);
+ acc1.s1 = fma(a1.s0, b0.s1, acc1.s1);
+ acc1.s2 = fma(a1.s0, b0.s2, acc1.s2);
+ acc1.s3 = fma(a1.s0, b0.s3, acc1.s3);
+
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+
+ acc2.s0 = fma(a2.s0, b0.s0, acc2.s0);
+ acc2.s1 = fma(a2.s0, b0.s1, acc2.s1);
+ acc2.s2 = fma(a2.s0, b0.s2, acc2.s2);
+ acc2.s3 = fma(a2.s0, b0.s3, acc2.s3);
+
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+ acc3.s0 = fma(a3.s0, b0.s0, acc3.s0);
+ acc3.s1 = fma(a3.s0, b0.s1, acc3.s1);
+ acc3.s2 = fma(a3.s0, b0.s2, acc3.s2);
+ acc3.s3 = fma(a3.s0, b0.s3, acc3.s3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+ // Load values from matrix A and matrix B
+ b0 = vload4(0, (__global float *)(src1_ptr + src_addr.s1));
+ src_addr.s1 += src1_stride_y;
+
+ // Multiply and accumulate
+ acc0.s0 = fma(a0.s1, b0.s0, acc0.s0);
+ acc0.s1 = fma(a0.s1, b0.s1, acc0.s1);
+ acc0.s2 = fma(a0.s1, b0.s2, acc0.s2);
+ acc0.s3 = fma(a0.s1, b0.s3, acc0.s3);
+
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+
+ acc1.s0 = fma(a1.s1, b0.s0, acc1.s0);
+ acc1.s1 = fma(a1.s1, b0.s1, acc1.s1);
+ acc1.s2 = fma(a1.s1, b0.s2, acc1.s2);
+ acc1.s3 = fma(a1.s1, b0.s3, acc1.s3);
+
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+
+ acc2.s0 = fma(a2.s1, b0.s0, acc2.s0);
+ acc2.s1 = fma(a2.s1, b0.s1, acc2.s1);
+ acc2.s2 = fma(a2.s1, b0.s2, acc2.s2);
+ acc2.s3 = fma(a2.s1, b0.s3, acc2.s3);
+
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+ acc3.s0 = fma(a3.s1, b0.s0, acc3.s0);
+ acc3.s1 = fma(a3.s1, b0.s1, acc3.s1);
+ acc3.s2 = fma(a3.s1, b0.s2, acc3.s2);
+ acc3.s3 = fma(a3.s1, b0.s3, acc3.s3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+ // Load values from matrix A and matrix B
+ b0 = vload4(0, (__global float *)(src1_ptr + src_addr.s1));
+ src_addr.s1 += src1_stride_y;
+
+ // Multiply and accumulate
+ acc0.s0 = fma(a0.s2, b0.s0, acc0.s0);
+ acc0.s1 = fma(a0.s2, b0.s1, acc0.s1);
+ acc0.s2 = fma(a0.s2, b0.s2, acc0.s2);
+ acc0.s3 = fma(a0.s2, b0.s3, acc0.s3);
+
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+
+ acc1.s0 = fma(a1.s2, b0.s0, acc1.s0);
+ acc1.s1 = fma(a1.s2, b0.s1, acc1.s1);
+ acc1.s2 = fma(a1.s2, b0.s2, acc1.s2);
+ acc1.s3 = fma(a1.s2, b0.s3, acc1.s3);
+
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+
+ acc2.s0 = fma(a2.s2, b0.s0, acc2.s0);
+ acc2.s1 = fma(a2.s2, b0.s1, acc2.s1);
+ acc2.s2 = fma(a2.s2, b0.s2, acc2.s2);
+ acc2.s3 = fma(a2.s2, b0.s3, acc2.s3);
+
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+ acc3.s0 = fma(a3.s2, b0.s0, acc3.s0);
+ acc3.s1 = fma(a3.s2, b0.s1, acc3.s1);
+ acc3.s2 = fma(a3.s2, b0.s2, acc3.s2);
+ acc3.s3 = fma(a3.s2, b0.s3, acc3.s3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+ // Load values from matrix A and matrix B
+ b0 = vload4(0, (__global float *)(src1_ptr + src_addr.s1));
+ src_addr.s1 += src1_stride_y;
+
+ // Multiply and accumulate
+ acc0.s0 = fma(a0.s3, b0.s0, acc0.s0);
+ acc0.s1 = fma(a0.s3, b0.s1, acc0.s1);
+ acc0.s2 = fma(a0.s3, b0.s2, acc0.s2);
+ acc0.s3 = fma(a0.s3, b0.s3, acc0.s3);
+
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+
+ acc1.s0 = fma(a1.s3, b0.s0, acc1.s0);
+ acc1.s1 = fma(a1.s3, b0.s1, acc1.s1);
+ acc1.s2 = fma(a1.s3, b0.s2, acc1.s2);
+ acc1.s3 = fma(a1.s3, b0.s3, acc1.s3);
+
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+
+ acc2.s0 = fma(a2.s3, b0.s0, acc2.s0);
+ acc2.s1 = fma(a2.s3, b0.s1, acc2.s1);
+ acc2.s2 = fma(a2.s3, b0.s2, acc2.s2);
+ acc2.s3 = fma(a2.s3, b0.s3, acc2.s3);
+
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+ acc3.s0 = fma(a3.s3, b0.s0, acc3.s0);
+ acc3.s1 = fma(a3.s3, b0.s1, acc3.s1);
+ acc3.s2 = fma(a3.s3, b0.s2, acc3.s2);
+ acc3.s3 = fma(a3.s3, b0.s3, acc3.s3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+ src_addr.s0 += 4 * sizeof(float);
+ }
+
+ for (; i < (int)COLS_A; ++i)
+ {
+#if defined(REINTERPRET_INPUT_AS_3D)
+ // Load values from matrix A
+ float a0 = *((__global float *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y + zin.s0));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ float a1 = *((__global float *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y + zin.s1));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ float a2 = *((__global float *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y + zin.s2));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ float a3 = *((__global float *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y + zin.s3));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#else // defined(REINTERPRET_INPUT_AS_3D)
+ // Load values from matrix A
+ float a0 = *((__global float *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ float a1 = *((__global float *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ float a2 = *((__global float *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ float a3 = *((__global float *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+ // Load values from matrix B
+ float4 b0 = vload4(0, (__global float *)(src1_ptr + src_addr.s1));
+ src_addr.s1 += src1_stride_y;
+
+ // Multiply and accumulate
+ acc0.s0 = fma(a0, b0.s0, acc0.s0);
+ acc0.s1 = fma(a0, b0.s1, acc0.s1);
+ acc0.s2 = fma(a0, b0.s2, acc0.s2);
+ acc0.s3 = fma(a0, b0.s3, acc0.s3);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ acc1.s0 = fma(a1, b0.s0, acc1.s0);
+ acc1.s1 = fma(a1, b0.s1, acc1.s1);
+ acc1.s2 = fma(a1, b0.s2, acc1.s2);
+ acc1.s3 = fma(a1, b0.s3, acc1.s3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ acc2.s0 = fma(a2, b0.s0, acc2.s0);
+ acc2.s1 = fma(a2, b0.s1, acc2.s1);
+ acc2.s2 = fma(a2, b0.s2, acc2.s2);
+ acc2.s3 = fma(a2, b0.s3, acc2.s3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ acc3.s0 = fma(a3, b0.s0, acc3.s0);
+ acc3.s1 = fma(a3, b0.s1, acc3.s1);
+ acc3.s2 = fma(a3, b0.s2, acc3.s2);
+ acc3.s3 = fma(a3, b0.s3, acc3.s3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+ src_addr.s0 += sizeof(float);
+ }
+
+ int z = get_global_id(2);
+
+ // Compute destination address
+ Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+ // Compute dst address
+ __global uchar *dst_addr = offset(&dst, 0, 0);
+
+ uint4 zout = 0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+ // the z dimension in order to take into account the presence of possible cross plane paddings
+ //
+ // | |
+ // | plane0 |
+ // | |
+ // |__________________|
+ // |******************|
+ // | cross_plane_pad |
+ // |******************|
+ // | |
+ // | plane1 |
+ // | |
+ // |__________________|
+
+ // The plane (zout) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+ // by HEIGHT_GEMM3D
+ zout = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+ (uint4)HEIGHT_GEMM3D;
+ zout = min(DEPTH_GEMM3D - 1, zout);
+
+ // Add offset due to the cross plane paddings
+ zout *= (dst_cross_plane_pad * dst_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply dst_stride_z by DEPTH_GEMM3D
+ dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+ // Add offset for batched GEMM
+ dst_addr += z * dst_stride_z;
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+ SCALE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, float, acc, ALPHA);
+#endif // defined(ALPHA)
+
+ // Add beta*bias
+#if defined(BETA)
+ REPEAT_VAR_INIT_TO_CONST(NUM_ELEMS_PROCESSED_PER_THREAD_Y, uint, zero, 0);
+
+#if defined(BROADCAST_BIAS)
+ __global uchar *src2_addr =
+ src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)4 * sizeof(float));
+
+ LOAD_BLOCK(1, 4, float, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+ SCALE_BLOCK(1, float, bias, BETA);
+#endif // UNIT_BIAS
+
+ // acc = acc + bias[broadcasted]
+ ADD_BLOCK_BROADCAST(NUM_ELEMS_PROCESSED_PER_THREAD_Y, acc, bias0);
+
+#else // defined(BROADCAST_BIAS)
+ __global uchar *src2_addr =
+ src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)4 * sizeof(float)) +
+ (get_global_id(1) * (uint)NUM_ELEMS_PROCESSED_PER_THREAD_Y * src2_stride_y) +
+ get_global_id(2) * src2_stride_z;
+
+ LOAD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, 4, float, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+ SCALE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, float, bias, BETA);
+#endif // UNIT_BIAS
+
+ // acc = acc + bias
+ ADD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, acc, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+ ACTIVATION_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, ACTIVATION_TYPE, float, acc, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+ // Store the output block
+ vstore4(acc0, 0, (__global float *)(dst_addr + 0 * dst_stride_y + zout.s0));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ vstore4(acc1, 0, (__global float *)(dst_addr + 1 * dst_stride_y + zout.s1));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ vstore4(acc2, 0, (__global float *)(dst_addr + 2 * dst_stride_y + zout.s2));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ vstore4(acc3, 0, (__global float *)(dst_addr + 3 * dst_stride_y + zout.s3));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+}
+
+/** This OpenCL kernel computes the matrix by matrix multiplication between the matrix A (src0) and
+ * matrix B (src1) in case both matrices have not been reshaped
+ *
+ * @note This OpenCL kernel works with the 32-bit floating point data type (float) and uses the fma
+ * units. This OpenCL kernel is optimized for Bifrost when the number of matrix B columns is less or
+ * equal to 1000.
+ * @note The number of elements processed along the x and y directions must be passed at compile
+ * time using -DNUM_ELEMS_PROCESSED_PER_THREAD_X and -DNUM_ELEMS_PROCESSED_PER_THREAD_Y. This kernel
+ * optimally uses -DNUM_ELEMS_PROCESSED_PER_THREAD_X=2.
+ * @note The number of matrix A columns must be passed at compile time using -DCOLS_A.
+ * @note The optional value of scalar alpha is passed at compile time using -DALPHA=alpha if
+ * alpha!=1.0f.
+ * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid
+ * out-of-bounds reads, the number of channels of matrix B must be passed at compile time using
+ * MATRIX_B_DEPTH (e.g. -DMATRIX_B_DEPTH=16) This case can happen when GEMM is used to perform the
+ * element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have
+ * multiple inputs (e.g. a = [K, M, 16, Batches], b = [N, K, 16])
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ * -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ * -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ * -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ * -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ * (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in] src0_ptr Pointer to the source matrix. Supported data
+ * types: F32
+ * @param[in] src0_stride_x Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in] src0_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src0_stride_y Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in] src0_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in] src1_ptr Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in] src1_stride_x Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in] src1_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src1_stride_y Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in] src1_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in] src2_ptr (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in] src2_stride_x (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in] src2_step_x (Optional) src2_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] src2_stride_y (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in] src2_step_y (Optional) src2_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in] src2_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in] dst_stride_x Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in] dst_step_x dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in] src0_stride_z Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in] src1_stride_z Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in] src2_stride_z (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in] dst_stride_z Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in] src_cross_plane_pad (Optional) Bottom paddings in unit of elements for
+ * the input tensor (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in] dst_cross_plane_pad (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_floating_point_f32_bifrost_1000(IMAGE_DECLARATION(src0),
+ IMAGE_DECLARATION(src1),
+#if defined(BETA)
+ IMAGE_DECLARATION(src2),
+#endif // defined(BETA)
+ IMAGE_DECLARATION(dst), uint src0_stride_z,
+ uint src1_stride_z,
+#if defined(BETA)
+ uint src2_stride_z,
+#endif // defined(BETA)
+ uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+ ,
+ uint src_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ ,
+ uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+ // Requires 2 NUM_ELEMS_PROCESSED_PER_THREAD_X, C vect2, A vect4, B (2 vload2) // to fix for
+ // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ int idx = get_global_id(0) * NUM_ELEMS_PROCESSED_PER_THREAD_X;
+
+ // Compute starting address for matrix A and Matrix B
+ int2 src_addr = ((int2)(src0_offset_first_element_in_bytes, src1_offset_first_element_in_bytes));
+
+ // Update address for the matrix A
+ src_addr.s0 += get_global_id(1) * src0_stride_y * NUM_ELEMS_PROCESSED_PER_THREAD_Y;
+
+ // Update address for the matrix B
+ src_addr.s1 += idx * sizeof(float);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+ // Since we load a 2D input tile from a 3D tensor, we need to check when the plane changes across
+ // the z dimension in order to take into account the presence of possible cross plane paddings
+ //
+ // | |
+ // | plane0 |
+ // | |
+ // |__________________|
+ // |******************|
+ // | cross_plane_pad |
+ // |******************|
+ // | |
+ // | plane1 |
+ // | |
+ // |__________________|
+
+ // The plane (zin) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+ // by HEIGHT_GEMM3D
+ uint4 zin = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+ (uint4)HEIGHT_GEMM3D;
+ zin = min(DEPTH_GEMM3D - 1, zin);
+
+ // Add offset due to the cross plane paddings
+ zin *= (src_cross_plane_pad * src0_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply src0_stride_z by DEPTH_GEMM3D
+ src_addr.s0 += get_global_id(2) * src0_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+ // Add offset for batched GEMM
+ src_addr.s0 += get_global_id(2) * src0_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+#if defined(MATRIX_B_DEPTH)
+ // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+ src_addr.s1 += (get_global_id(2) % MATRIX_B_DEPTH) * src1_stride_z;
+#else // defined(MATRIX_B_DEPTH)
+ src_addr.s1 += get_global_id(2) * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+ // Initialize accumulators
+ float2 acc0 = 0.0f;
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ float2 acc1 = 0.0f;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ float2 acc2 = 0.0f;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ float2 acc3 = 0.0f;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+ // A and B src indices get incremented at the same time.
+ int i = 0;
+ for (; i <= ((int)COLS_A - 8); i += 8)
+ {
+#if defined(REINTERPRET_INPUT_AS_3D)
+ // Load values from matrix A
+ float8 a0 = vload8(0, (__global float *)(src0_ptr + src_addr.s0 + zin.s0));
+#else // defined(REINTERPRET_INPUT_AS_3D)
+ // Load values from matrix A
+ float8 a0 = vload8(0, (__global float *)(src0_ptr + src_addr.s0));
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+ // Load values from matrix B
+ float2 b0 = vload2(0, (__global float *)(src1_ptr + src_addr.s1));
+ src_addr.s1 += src1_stride_y;
+ float2 b1 = vload2(0, (__global float *)(src1_ptr + src_addr.s1));
+ src_addr.s1 += src1_stride_y;
+ float2 b2 = vload2(0, (__global float *)(src1_ptr + src_addr.s1));
+ src_addr.s1 += src1_stride_y;
+ float2 b3 = vload2(0, (__global float *)(src1_ptr + src_addr.s1));
+ src_addr.s1 += src1_stride_y;
+ float2 b4 = vload2(0, (__global float *)(src1_ptr + src_addr.s1));
+ src_addr.s1 += src1_stride_y;
+ float2 b5 = vload2(0, (__global float *)(src1_ptr + src_addr.s1));
+ src_addr.s1 += src1_stride_y;
+ float2 b6 = vload2(0, (__global float *)(src1_ptr + src_addr.s1));
+ src_addr.s1 += src1_stride_y;
+ float2 b7 = vload2(0, (__global float *)(src1_ptr + src_addr.s1));
+ src_addr.s1 += src1_stride_y;
+
+ // Multiply and accumulate
+ acc0.s0 = fma(a0.s0, b0.s0, acc0.s0);
+ acc0.s0 = fma(a0.s1, b1.s0, acc0.s0);
+ acc0.s0 = fma(a0.s2, b2.s0, acc0.s0);
+ acc0.s0 = fma(a0.s3, b3.s0, acc0.s0);
+ acc0.s0 = fma(a0.s4, b4.s0, acc0.s0);
+ acc0.s0 = fma(a0.s5, b5.s0, acc0.s0);
+ acc0.s0 = fma(a0.s6, b6.s0, acc0.s0);
+ acc0.s0 = fma(a0.s7, b7.s0, acc0.s0);
+
+ acc0.s1 = fma(a0.s0, b0.s1, acc0.s1);
+ acc0.s1 = fma(a0.s1, b1.s1, acc0.s1);
+ acc0.s1 = fma(a0.s2, b2.s1, acc0.s1);
+ acc0.s1 = fma(a0.s3, b3.s1, acc0.s1);
+ acc0.s1 = fma(a0.s4, b4.s1, acc0.s1);
+ acc0.s1 = fma(a0.s5, b5.s1, acc0.s1);
+ acc0.s1 = fma(a0.s6, b6.s1, acc0.s1);
+ acc0.s1 = fma(a0.s7, b7.s1, acc0.s1);
+
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if defined(REINTERPRET_INPUT_AS_3D)
+ a0 = vload8(0, (__global float *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y + zin.s1));
+#else // defined(REINTERPRET_INPUT_AS_3D)
+ a0 = vload8(0, (__global float *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+ acc1.s0 = fma(a0.s0, b0.s0, acc1.s0);
+ acc1.s0 = fma(a0.s1, b1.s0, acc1.s0);
+ acc1.s0 = fma(a0.s2, b2.s0, acc1.s0);
+ acc1.s0 = fma(a0.s3, b3.s0, acc1.s0);
+ acc1.s0 = fma(a0.s4, b4.s0, acc1.s0);
+ acc1.s0 = fma(a0.s5, b5.s0, acc1.s0);
+ acc1.s0 = fma(a0.s6, b6.s0, acc1.s0);
+ acc1.s0 = fma(a0.s7, b7.s0, acc1.s0);
+
+ acc1.s1 = fma(a0.s0, b0.s1, acc1.s1);
+ acc1.s1 = fma(a0.s1, b1.s1, acc1.s1);
+ acc1.s1 = fma(a0.s2, b2.s1, acc1.s1);
+ acc1.s1 = fma(a0.s3, b3.s1, acc1.s1);
+ acc1.s1 = fma(a0.s4, b4.s1, acc1.s1);
+ acc1.s1 = fma(a0.s5, b5.s1, acc1.s1);
+ acc1.s1 = fma(a0.s6, b6.s1, acc1.s1);
+ acc1.s1 = fma(a0.s7, b7.s1, acc1.s1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if defined(REINTERPRET_INPUT_AS_3D)
+ a0 = vload8(0, (__global float *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y + zin.s2));
+#else // defined(REINTERPRET_INPUT_AS_3D)
+ a0 = vload8(0, (__global float *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+ acc2.s0 = fma(a0.s0, b0.s0, acc2.s0);
+ acc2.s0 = fma(a0.s1, b1.s0, acc2.s0);
+ acc2.s0 = fma(a0.s2, b2.s0, acc2.s0);
+ acc2.s0 = fma(a0.s3, b3.s0, acc2.s0);
+ acc2.s0 = fma(a0.s4, b4.s0, acc2.s0);
+ acc2.s0 = fma(a0.s5, b5.s0, acc2.s0);
+ acc2.s0 = fma(a0.s6, b6.s0, acc2.s0);
+ acc2.s0 = fma(a0.s7, b7.s0, acc2.s0);
+
+ acc2.s1 = fma(a0.s0, b0.s1, acc2.s1);
+ acc2.s1 = fma(a0.s1, b1.s1, acc2.s1);
+ acc2.s1 = fma(a0.s2, b2.s1, acc2.s1);
+ acc2.s1 = fma(a0.s3, b3.s1, acc2.s1);
+ acc2.s1 = fma(a0.s4, b4.s1, acc2.s1);
+ acc2.s1 = fma(a0.s5, b5.s1, acc2.s1);
+ acc2.s1 = fma(a0.s6, b6.s1, acc2.s1);
+ acc2.s1 = fma(a0.s7, b7.s1, acc2.s1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#if defined(REINTERPRET_INPUT_AS_3D)
+ a0 = vload8(0, (__global float *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y + zin.s3));
+#else // defined(REINTERPRET_INPUT_AS_3D)
+ a0 = vload8(0, (__global float *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+ acc3.s0 = fma(a0.s0, b0.s0, acc3.s0);
+ acc3.s0 = fma(a0.s1, b1.s0, acc3.s0);
+ acc3.s0 = fma(a0.s2, b2.s0, acc3.s0);
+ acc3.s0 = fma(a0.s3, b3.s0, acc3.s0);
+ acc3.s0 = fma(a0.s4, b4.s0, acc3.s0);
+ acc3.s0 = fma(a0.s5, b5.s0, acc3.s0);
+ acc3.s0 = fma(a0.s6, b6.s0, acc3.s0);
+ acc3.s0 = fma(a0.s7, b7.s0, acc3.s0);
+
+ acc3.s1 = fma(a0.s0, b0.s1, acc3.s1);
+ acc3.s1 = fma(a0.s1, b1.s1, acc3.s1);
+ acc3.s1 = fma(a0.s2, b2.s1, acc3.s1);
+ acc3.s1 = fma(a0.s3, b3.s1, acc3.s1);
+ acc3.s1 = fma(a0.s4, b4.s1, acc3.s1);
+ acc3.s1 = fma(a0.s5, b5.s1, acc3.s1);
+ acc3.s1 = fma(a0.s6, b6.s1, acc3.s1);
+ acc3.s1 = fma(a0.s7, b7.s1, acc3.s1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+ src_addr.s0 += sizeof(float) * 8;
+ }
+ // float size increment
+ for (; i < (int)COLS_A; ++i)
+ {
+#if defined(REINTERPRET_INPUT_AS_3D)
+ // Load values from matrix A
+ float a0 = *((__global float *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y + zin.s0));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ float a1 = *((__global float *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y + zin.s1));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ float a2 = *((__global float *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y + zin.s2));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ float a3 = *((__global float *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y + zin.s3));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#else // defined(REINTERPRET_INPUT_AS_3D)
+ // Load values from matrix A
+ float a0 = *((__global float *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ float a1 = *((__global float *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ float a2 = *((__global float *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ float a3 = *((__global float *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+ // Load values from matrix B
+ float2 b0 = vload2(0, (__global float *)(src1_ptr + src_addr.s1));
+ src_addr.s1 += src1_stride_y;
+
+ // Multiply and accumulate
+ acc0.s0 = fma(a0, b0.s0, acc0.s0);
+ acc0.s1 = fma(a0, b0.s1, acc0.s1);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ acc1.s0 = fma(a1, b0.s0, acc1.s0);
+ acc1.s1 = fma(a1, b0.s1, acc1.s1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ acc2.s0 = fma(a2, b0.s0, acc2.s0);
+ acc2.s1 = fma(a2, b0.s1, acc2.s1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ acc3.s0 = fma(a3, b0.s0, acc3.s0);
+ acc3.s1 = fma(a3, b0.s1, acc3.s1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+ src_addr.s0 += sizeof(float);
+ }
+
+ int z = get_global_id(2);
+
+ // Compute destination address
+ Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+ // Compute dst address
+ __global uchar *dst_addr = offset(&dst, 0, 0);
+
+ uint4 zout = 0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+ // the z dimension in order to take into account the presence of possible cross plane paddings
+ //
+ // | |
+ // | plane0 |
+ // | |
+ // |__________________|
+ // |******************|
+ // | cross_plane_pad |
+ // |******************|
+ // | |
+ // | plane1 |
+ // | |
+ // |__________________|
+
+ // The plane (zout) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+ // by HEIGHT_GEMM3D
+ zout = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+ (uint4)HEIGHT_GEMM3D;
+ zout = min(DEPTH_GEMM3D - 1, zout);
+
+ // Add offset due to the cross plane paddings
+ zout *= (dst_cross_plane_pad * dst_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply dst_stride_z by DEPTH_GEMM3D
+ dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+ // Add offset for batched GEMM
+ dst_addr += z * dst_stride_z;
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+ SCALE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, float, acc, ALPHA);
+#endif // defined(ALPHA)
+
+ // Add beta*bias
+#if defined(BETA)
+ REPEAT_VAR_INIT_TO_CONST(NUM_ELEMS_PROCESSED_PER_THREAD_Y, uint, zero, 0);
+
+#if defined(BROADCAST_BIAS)
+ __global uchar *src2_addr =
+ src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)2 * sizeof(float));
+
+ LOAD_BLOCK(1, 2, float, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+ SCALE_BLOCK(1, float, bias, BETA);
+#endif // UNIT_BIAS
+
+ // acc = acc + bias[broadcasted]
+ ADD_BLOCK_BROADCAST(NUM_ELEMS_PROCESSED_PER_THREAD_Y, acc, bias0);
+
+#else // defined(BROADCAST_BIAS)
+ __global uchar *src2_addr =
+ src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)2 * sizeof(float)) +
+ (get_global_id(1) * (uint)NUM_ELEMS_PROCESSED_PER_THREAD_Y * src2_stride_y) +
+ get_global_id(2) * src2_stride_z;
+
+ LOAD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, 2, float, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+ SCALE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, float, bias, BETA);
+#endif // UNIT_BIAS
+
+ // acc = acc + bias
+ ADD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, acc, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+ ACTIVATION_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, ACTIVATION_TYPE, float, acc, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+ // Store the output block
+ vstore2(acc0, 0, (__global float *)(dst_addr + 0 * dst_stride_y + zout.s0));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ vstore2(acc1, 0, (__global float *)(dst_addr + 1 * dst_stride_y + zout.s1));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ vstore2(acc2, 0, (__global float *)(dst_addr + 2 * dst_stride_y + zout.s2));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ vstore2(acc3, 0, (__global float *)(dst_addr + 3 * dst_stride_y + zout.s3));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+}
+
+#if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED)
+/** This OpenCL kernel computes the matrix by matrix multiplication between the matrix A (src0) and
+ * matrix B (src1) in case both matrices have not beed reshaped
+ *
+ * @note This OpenCL kernel works with the 16-bit floating point data type (half) and accumulating
+ * the result in a 32 floating point variable.
+ * @note The number of elements processed along the x and y directions must be passed at compile
+ * time using -DNUM_ELEMS_PROCESSED_PER_THREAD_X and -DNUM_ELEMS_PROCESSED_PER_THREAD_Y. This kernel
+ * optimally uses -DNUM_ELEMS_PROCESSED_PER_THREAD_X=4.
+ * @note The number of matrix A columns must be passed at compile time using -DCOLS_A.
+ * @note The optional value of scalar alpha is passed at compile time using -DALPHA=alpha
+ * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid
+ * out-of-bounds reads, the number of channels of matrix B must be passed at compile time using
+ * MATRIX_B_DEPTH (e.g. -DMATRIX_B_DEPTH=16) This case can happen when GEMM is used to perform the
+ * element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have
+ * multiple inputs (e.g. a = [K, M, 16, Batches], b = [N, K, 16])
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ * -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ * -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ * -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ * -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ * (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in] src0_ptr Pointer to the source matrix. Supported data
+ * types: F16
+ * @param[in] src0_stride_x Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in] src0_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src0_stride_y Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in] src0_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in] src1_ptr Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in] src1_stride_x Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in] src1_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src1_stride_y Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in] src1_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in] src2_ptr (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in] src2_stride_x (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in] src2_step_x (Optional) src2_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] src2_stride_y (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in] src2_step_y (Optional) src2_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in] src2_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in] dst_stride_x Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in] dst_step_x dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in] src0_stride_z Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in] src1_stride_z Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in] src2_stride_z (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in] dst_stride_z Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in] src_cross_plane_pad (Optional) Bottom paddings in unit of elements for
+ * the input tensor (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in] dst_cross_plane_pad (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_floating_point_f16_bifrost_acc32(IMAGE_DECLARATION(src0),
+ IMAGE_DECLARATION(src1),
+#if defined(BETA)
+ IMAGE_DECLARATION(src2),
+#endif // defined(BETA)
+ IMAGE_DECLARATION(dst), uint src0_stride_z,
+ uint src1_stride_z,
+#if defined(BETA)
+ uint src2_stride_z,
+#endif // defined(BETA)
+ uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+ ,
+ uint src_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ ,
+ uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+ int idx = get_global_id(0) * NUM_ELEMS_PROCESSED_PER_THREAD_X;
+
+ // Compute starting address for matrix A and Matrix B
+ int2 src_addr = ((int2)(src0_offset_first_element_in_bytes, src1_offset_first_element_in_bytes));
+
+ // Update address for the matrix A
+ src_addr.s0 += get_global_id(1) * src0_stride_y * NUM_ELEMS_PROCESSED_PER_THREAD_Y;
+
+ // Update address for the matrix B
+ src_addr.s1 += idx * sizeof(half);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+ // Since we load a 2D input tile from a 3D tensor, we need to check when the plane changes across
+ // the z dimension in order to take into account the presence of possible cross plane paddings
+ //
+ // | |
+ // | plane0 |
+ // | |
+ // |__________________|
+ // |******************|
+ // | cross_plane_pad |
+ // |******************|
+ // | |
+ // | plane1 |
+ // | |
+ // |__________________|
+
+ // The plane (zin) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+ // by HEIGHT_GEMM3D
+ uint4 zin = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+ (uint4)HEIGHT_GEMM3D;
+ zin = min(DEPTH_GEMM3D - 1, zin);
+
+ // Add offset due to the cross plane paddings
+ zin *= (src_cross_plane_pad * src0_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply src0_stride_z by DEPTH_GEMM3D
+ src_addr.s0 += get_global_id(2) * src0_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+ // Add offset for batched GEMM
+ src_addr.s0 += get_global_id(2) * src0_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+#if defined(MATRIX_B_DEPTH)
+ // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+ src_addr.s1 += (get_global_id(2) % MATRIX_B_DEPTH) * src1_stride_z;
+#else // defined(MATRIX_B_DEPTH)
+ src_addr.s1 += get_global_id(2) * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+ float8 acc0 = 0.0h;
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ float8 acc1 = 0.0h;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ float8 acc2 = 0.0h;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ float8 acc3 = 0.0h;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+ int i = 0;
+ for (; i <= ((int)COLS_A - 4); i += 4)
+ {
+#if defined(REINTERPRET_INPUT_AS_3D)
+ // Load values from matrix A
+ LOAD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, 4, half, a, src0_ptr, src_addr.s0, src0_stride_y,
+ zin.s);
+#else // defined(REINTERPRET_INPUT_AS_3D)
+ // Load values from matrix A
+ half4 a0 = vload4(0, (__global half *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ half4 a1 = vload4(0, (__global half *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ half4 a2 = vload4(0, (__global half *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ half4 a3 = vload4(0, (__global half *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+ // Load values from matrix B
+ float8 b0 = convert_float8(vload8(0, (__global half *)(src1_ptr + src_addr.s1)));
+ src_addr.s1 += src1_stride_y;
+
+ // Accumulate
+ acc0 = fma(b0, (float8)a0.s0, acc0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ acc1 = fma(b0, (float8)a1.s0, acc1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ acc2 = fma(b0, (float8)a2.s0, acc2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ acc3 = fma(b0, (float8)a3.s0, acc3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+ b0 = convert_float8(vload8(0, (__global half *)(src1_ptr + src_addr.s1)));
+ src_addr.s1 += src1_stride_y;
+ acc0 = fma(b0, (float8)a0.s1, acc0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ acc1 = fma(b0, (float8)a1.s1, acc1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ acc2 = fma(b0, (float8)a2.s1, acc2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ acc3 = fma(b0, (float8)a3.s1, acc3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+ b0 = convert_float8(vload8(0, (__global half *)(src1_ptr + src_addr.s1)));
+ src_addr.s1 += src1_stride_y;
+ acc0 = fma(b0, (float8)a0.s2, acc0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ acc1 = fma(b0, (float8)a1.s2, acc1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ acc2 = fma(b0, (float8)a2.s2, acc2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ acc3 = fma(b0, (float8)a3.s2, acc3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+ b0 = convert_float8(vload8(0, (__global half *)(src1_ptr + src_addr.s1)));
+ src_addr.s1 += src1_stride_y;
+ acc0 = fma(b0, (float8)a0.s3, acc0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ acc1 = fma(b0, (float8)a1.s3, acc1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ acc2 = fma(b0, (float8)a2.s3, acc2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ acc3 = fma(b0, (float8)a3.s3, acc3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+ src_addr.s0 += 4 * sizeof(half);
+ }
+
+ for (; i < (int)COLS_A; ++i)
+ {
+#if defined(REINTERPRET_INPUT_AS_3D)
+ // Load values from matrix A
+ half a0 = *((__global half *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y + zin.s0));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ half a1 = *((__global half *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y + zin.s1));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ half a2 = *((__global half *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y + zin.s2));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ half a3 = *((__global half *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y + zin.s3));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#else // defined(REINTERPRET_INPUT_AS_3D)
+ // Load values from matrix A
+ half a0 = *((__global half *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ half a1 = *((__global half *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ half a2 = *((__global half *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ half a3 = *((__global half *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+ // Load values from matrix B
+ float8 b0 = convert_float8(vload8(0, (__global half *)(src1_ptr + src_addr.s1)));
+
+ src_addr += (int2)(sizeof(half), src1_stride_y);
+
+ // Accumulate
+ acc0 = fma(b0, (float8)a0, acc0); // b0 * (half8)a0;
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ acc1 = fma(b0, (float8)a1, acc1); // b0 * (half8)a1;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ acc2 = fma(b0, (float8)a2, acc2); // b0 * (half8)a2;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ acc3 = fma(b0, (float8)a3, acc3); // b0 * (half8)a3;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ }
+
+ int z = get_global_id(2);
+
+ // Compute destination address
+ Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+ // Compute dst address
+ __global uchar *dst_addr = offset(&dst, 0, 0);
+
+ uint4 zout = 0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+ // the z dimension in order to take into account the presence of possible cross plane paddings
+ //
+ // | |
+ // | plane0 |
+ // | |
+ // |__________________|
+ // |******************|
+ // | cross_plane_pad |
+ // |******************|
+ // | |
+ // | plane1 |
+ // | |
+ // |__________________|
+
+ // The plane (zout) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+ // by HEIGHT_GEMM3D
+ zout = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+ (uint4)HEIGHT_GEMM3D;
+ zout = min(DEPTH_GEMM3D - 1, zout);
+
+ // Add offset due to the cross plane paddings
+ zout *= (dst_cross_plane_pad * dst_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply dst_stride_z by DEPTH_GEMM3D
+ dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+ // Add offset for batched GEMM
+ dst_addr += z * dst_stride_z;
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+ SCALE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, float, acc, ALPHA);
+#endif // defined(ALPHA)
+
+#if defined(BETA)
+ REPEAT_VAR_INIT_TO_CONST(NUM_ELEMS_PROCESSED_PER_THREAD_Y, uint, zero, 0);
+
+#if defined(BROADCAST_BIAS)
+ __global uchar *src2_addr =
+ src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)8 * sizeof(half));
+
+ LOAD_BLOCK(1, 8, half, bias, src2_addr, 0, src2_stride_y, zero);
+
+ float8 bias_f0 = convert_float8(bias0);
+
+#ifndef UNIT_BETA
+ SCALE_BLOCK(1, float, bias_f, BETA);
+#endif // UNIT_BIAS
+
+ // acc = acc + bias[broadcasted]
+ ADD_BLOCK_BROADCAST(NUM_ELEMS_PROCESSED_PER_THREAD_Y, acc, bias_f0);
+
+#else // defined(BROADCAST_BIAS)
+ __global uchar *src2_addr =
+ src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)8 * sizeof(half)) +
+ (get_global_id(1) * (uint)NUM_ELEMS_PROCESSED_PER_THREAD_Y * src2_stride_y) +
+ get_global_id(2) * src2_stride_z;
+
+ LOAD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, 8, half, bias, src2_addr, 0, src2_stride_y, zero);
+
+ float8 bias_f0 = convert_float8(bias0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ float8 bias_f1 = convert_float8(bias1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ float8 bias_f2 = convert_float8(bias2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ float8 bias_f3 = convert_float8(bias3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+#ifndef UNIT_BETA
+ SCALE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, float, bias_f, BETA);
+#endif // UNIT_BIAS
+
+ // acc = acc + bias
+ ADD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, acc, bias_f);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+ half8 acc_h0 = convert_half8(acc0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ half8 acc_h1 = convert_half8(acc1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ half8 acc_h2 = convert_half8(acc2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ half8 acc_h3 = convert_half8(acc3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+#if defined(ACTIVATION_TYPE)
+ ACTIVATION_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, ACTIVATION_TYPE, half, acc_h, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+ // Store the output block
+ STORE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, 8, half, acc_h, dst_addr, dst_stride_y, zout.s);
+}
+
+/** This OpenCL kernel computes the matrix by matrix multiplication between the matrix A (src0) and
+ * matrix B (src1) in case both matrices have not beed reshaped
+ *
+ * @note This OpenCL kernel works with the 16-bit floating point data type (half) and uses the fma
+ * units.
+ * @note The number of elements processed along the x and y directions must be passed at compile
+ * time using -DNUM_ELEMS_PROCESSED_PER_THREAD_X and -DNUM_ELEMS_PROCESSED_PER_THREAD_Y. This kernel
+ * optimally uses -DNUM_ELEMS_PROCESSED_PER_THREAD_X=4.
+ * @note The number of matrix A columns must be passed at compile time using -DCOLS_A.
+ * @note The optional value of scalar alpha is passed at compile time using -DALPHA=alpha
+ * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid
+ * out-of-bounds reads, the number of channels of matrix B must be passed at compile time using
+ * MATRIX_B_DEPTH (e.g. -DMATRIX_B_DEPTH=16) This case can happen when GEMM is used to perform the
+ * element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have
+ * multiple inputs (e.g. a = [K, M, 16, Batches], b = [N, K, 16])
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ * -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ * -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ * -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ * -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ * (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in] src0_ptr Pointer to the source matrix. Supported data
+ * types: F16
+ * @param[in] src0_stride_x Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in] src0_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src0_stride_y Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in] src0_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in] src1_ptr Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in] src1_stride_x Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in] src1_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src1_stride_y Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in] src1_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in] src2_ptr (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in] src2_stride_x (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in] src2_step_x (Optional) src2_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] src2_stride_y (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in] src2_step_y (Optional) src2_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in] src2_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in] dst_stride_x Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in] dst_step_x dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in] src0_stride_z Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in] src1_stride_z Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in] src2_stride_z (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in] dst_stride_z Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in] src_cross_plane_pad (Optional) Bottom paddings in unit of elements for
+ * the input tensor (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in] dst_cross_plane_pad (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_floating_point_f16_bifrost(IMAGE_DECLARATION(src0), IMAGE_DECLARATION(src1),
+#if defined(BETA)
+ IMAGE_DECLARATION(src2),
+#endif // defined(BETA)
+ IMAGE_DECLARATION(dst), uint src0_stride_z,
+ uint src1_stride_z,
+#if defined(BETA)
+ uint src2_stride_z,
+#endif // defined(BETA)
+ uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+ ,
+ uint src_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ ,
+ uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+ int idx = get_global_id(0) * NUM_ELEMS_PROCESSED_PER_THREAD_X;
+
+ // Compute starting address for matrix A and Matrix B
+ int2 src_addr = ((int2)(src0_offset_first_element_in_bytes, src1_offset_first_element_in_bytes));
+
+ // Update address for the matrix A
+ src_addr.s0 += get_global_id(1) * src0_stride_y * NUM_ELEMS_PROCESSED_PER_THREAD_Y;
+
+ // Update address for the matrix B
+ src_addr.s1 += idx * sizeof(half);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+ // Since we load a 2D input tile from a 3D tensor, we need to check when the plane changes across
+ // the z dimension in order to take into account the presence of possible cross plane paddings
+ //
+ // | |
+ // | plane0 |
+ // | |
+ // |__________________|
+ // |******************|
+ // | cross_plane_pad |
+ // |******************|
+ // | |
+ // | plane1 |
+ // | |
+ // |__________________|
+
+ // The plane (zin) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+ // by HEIGHT_GEMM3D
+ uint4 zin = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+ (uint4)HEIGHT_GEMM3D;
+ zin = min(DEPTH_GEMM3D - 1, zin);
+
+ // Add offset due to the cross plane paddings
+ zin *= (src_cross_plane_pad * src0_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply src0_stride_z by DEPTH_GEMM3D
+ src_addr.s0 += get_global_id(2) * src0_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+ // Add offset for batched GEMM
+ src_addr.s0 += get_global_id(2) * src0_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+#if defined(MATRIX_B_DEPTH)
+ // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+ src_addr.s1 += (get_global_id(2) % MATRIX_B_DEPTH) * src1_stride_z;
+#else // defined(MATRIX_B_DEPTH)
+ src_addr.s1 += get_global_id(2) * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+ half8 acc0 = 0.0h;
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ half8 acc1 = 0.0h;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ half8 acc2 = 0.0h;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ half8 acc3 = 0.0h;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+ int i = 0;
+ for (; i <= ((int)COLS_A - 4); i += 4)
+ {
+#if defined(REINTERPRET_INPUT_AS_3D)
+ // Load values from matrix A
+ LOAD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, 4, half, a, src0_ptr, src_addr.s0, src0_stride_y,
+ zin.s);
+#else // defined(REINTERPRET_INPUT_AS_3D)
+ // Load values from matrix A
+ half4 a0 = vload4(0, (__global half *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ half4 a1 = vload4(0, (__global half *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ half4 a2 = vload4(0, (__global half *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ half4 a3 = vload4(0, (__global half *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+ // Load values from matrix B
+ half8 b0 = vload8(0, (__global half *)(src1_ptr + src_addr.s1));
+ src_addr.s1 += src1_stride_y;
+
+ // Accumulate
+ acc0 = fma(b0, (half8)a0.s0, acc0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ acc1 = fma(b0, (half8)a1.s0, acc1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ acc2 = fma(b0, (half8)a2.s0, acc2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ acc3 = fma(b0, (half8)a3.s0, acc3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+ b0 = vload8(0, (__global half *)(src1_ptr + src_addr.s1));
+ src_addr.s1 += src1_stride_y;
+ acc0 = fma(b0, (half8)a0.s1, acc0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ acc1 = fma(b0, (half8)a1.s1, acc1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ acc2 = fma(b0, (half8)a2.s1, acc2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ acc3 = fma(b0, (half8)a3.s1, acc3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+ b0 = vload8(0, (__global half *)(src1_ptr + src_addr.s1));
+ src_addr.s1 += src1_stride_y;
+ acc0 = fma(b0, (half8)a0.s2, acc0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ acc1 = fma(b0, (half8)a1.s2, acc1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ acc2 = fma(b0, (half8)a2.s2, acc2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ acc3 = fma(b0, (half8)a3.s2, acc3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+ b0 = vload8(0, (__global half *)(src1_ptr + src_addr.s1));
+ src_addr.s1 += src1_stride_y;
+ acc0 = fma(b0, (half8)a0.s3, acc0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ acc1 = fma(b0, (half8)a1.s3, acc1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ acc2 = fma(b0, (half8)a2.s3, acc2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ acc3 = fma(b0, (half8)a3.s3, acc3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+ src_addr.s0 += 4 * sizeof(half);
+ }
+
+ for (; i < (int)COLS_A; ++i)
+ {
+#if defined(REINTERPRET_INPUT_AS_3D)
+ // Load values from matrix A
+ half a0 = *((__global half *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y + zin.s0));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ half a1 = *((__global half *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y + zin.s1));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ half a2 = *((__global half *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y + zin.s2));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ half a3 = *((__global half *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y + zin.s3));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#else // defined(REINTERPRET_INPUT_AS_3D)
+ // Load values from matrix A
+ half a0 = *((__global half *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ half a1 = *((__global half *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ half a2 = *((__global half *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ half a3 = *((__global half *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+ // Load values from matrix B
+ half8 b0 = vload8(0, (__global half *)(src1_ptr + src_addr.s1));
+
+ src_addr += (int2)(sizeof(half), src1_stride_y);
+
+ // Accumulate
+ acc0 = fma(b0, (half8)a0, acc0); // b0 * (half8)a0;
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ acc1 = fma(b0, (half8)a1, acc1); // b0 * (half8)a1;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ acc2 = fma(b0, (half8)a2, acc2); // b0 * (half8)a2;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ acc3 = fma(b0, (half8)a3, acc3); // b0 * (half8)a3;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ }
+
+ int z = get_global_id(2);
+
+ // Compute destination address
+ Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+ // Compute dst address
+ __global uchar *dst_addr = offset(&dst, 0, 0);
+
+ uint4 zout = 0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+ // the z dimension in order to take into account the presence of possible cross plane paddings
+ //
+ // | |
+ // | plane0 |
+ // | |
+ // |__________________|
+ // |******************|
+ // | cross_plane_pad |
+ // |******************|
+ // | |
+ // | plane1 |
+ // | |
+ // |__________________|
+
+ // The plane (zout) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+ // by HEIGHT_GEMM3D
+ zout = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+ (uint4)HEIGHT_GEMM3D;
+ zout = min(DEPTH_GEMM3D - 1, zout);
+
+ // Add offset due to the cross plane paddings
+ zout *= (dst_cross_plane_pad * dst_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply dst_stride_z by DEPTH_GEMM3D
+ dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+ // Add offset for batched GEMM
+ dst_addr += z * dst_stride_z;
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+ SCALE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, half, acc, ALPHA);
+#endif // defined(ALPHA)
+
+ // Add beta*bias
+#if defined(BETA)
+ REPEAT_VAR_INIT_TO_CONST(NUM_ELEMS_PROCESSED_PER_THREAD_Y, uint, zero, 0);
+
+#if defined(BROADCAST_BIAS)
+ __global uchar *src2_addr =
+ src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)8 * sizeof(half));
+
+ LOAD_BLOCK(1, 8, half, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+ SCALE_BLOCK(1, half, bias, BETA);
+#endif // UNIT_BIAS
+
+ // acc = acc + bias[broadcasted]
+ ADD_BLOCK_BROADCAST(NUM_ELEMS_PROCESSED_PER_THREAD_Y, acc, bias0);
+
+#else // defined(BROADCAST_BIAS)
+ __global uchar *src2_addr =
+ src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)8 * sizeof(half)) +
+ (get_global_id(1) * (uint)NUM_ELEMS_PROCESSED_PER_THREAD_Y * src2_stride_y) +
+ get_global_id(2) * src2_stride_z;
+
+ LOAD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, 8, half, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+ SCALE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, half, bias, BETA);
+#endif // UNIT_BIAS
+
+ // acc = acc + bias
+ ADD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, acc, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+ ACTIVATION_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, ACTIVATION_TYPE, half, acc, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+ // Store the output block
+ STORE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, 8, half, acc, dst_addr, dst_stride_y, zout.s);
+}
+#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED)
+
+#endif // defined(COLS_A) && defined(NUM_ELEMS_PROCESSED_PER_THREAD_X) &&
+ // (NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+
+#if defined(BETA)
+/** This OpenCL kernel performs the in-place matrix addition between 2 matrices taking into account
+ * that the second matrix might be weighted by a scalar value beta:
+ *
+ * @note The beta's value need to be passed at compile time using -DBETA
+ *
+ * @param[in] src_ptr Pointer to the source matrix. Supported data types:
+ * F32
+ * @param[in] src_stride_x Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in] src_step_x src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in] src_stride_y Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in] src_step_y src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in] src_stride_z Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in] src_step_z dst_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[out] dst_ptr Pointer to the destination matrix Supported data
+ * types: same as @p src_ptr
+ * @param[in] dst_stride_x Stride of the destination matrix in X dimension (in
+ * bytes)
+ * @param[in] dst_step_x dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination matrix in Y dimension (in
+ * bytes)
+ * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in] dst_step_z dst_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ */
+__kernel void gemm_ma_f32(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(dst))
+{
+ // Compute source and destination addresses
+ Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
+ Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+ // Load values from A x B
+ float4 alpha_ab = vload4(0, (__global float *)dst.ptr);
+
+ // Load values from Matrix C
+ float4 c = vload4(0, (__global float *)src.ptr);
+
+ // Computes alpha * axb + beta * c
+ float4 out = alpha_ab + (float4)BETA * c;
+
+ // Store final result in axb matrix
+ vstore4(out, 0, (__global float *)dst.ptr);
+}
+
+#if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED)
+/** This OpenCL kernel performs the in-place matrix addition between 2 matrices taking into account
+ * that the second matrix might be weighted by a scalar value beta:
+ *
+ * @note The beta's value need to be passed at compile time using -DBETA
+ *
+ * @param[in] src_ptr Pointer to the source matrix. Supported data types:
+ * F16
+ * @param[in] src_stride_x Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in] src_step_x src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in] src_stride_y Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in] src_step_y src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in] src_stride_z Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in] src_step_z dst_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[out] dst_ptr Pointer to the destination matrix Supported data
+ * types: same as @p src_ptr
+ * @param[in] dst_stride_x Stride of the destination matrix in X dimension (in
+ * bytes)
+ * @param[in] dst_step_x dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination matrix in Y dimension (in
+ * bytes)
+ * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in] dst_step_z dst_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ */
+__kernel void gemm_ma_f16(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(dst))
+{
+ // Compute source and destination addresses
+ Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
+ Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+ // Load values from A x B
+ half8 alpha_ab = vload8(0, (__global half *)dst.ptr);
+
+ // Load values from Matrix C
+ half8 c = vload8(0, (__global half *)src.ptr);
+
+ // Computes alpha * axb + beta * c
+ half8 out = alpha_ab + (half8)BETA * c;
+
+ // Store final result in axb matrix
+ vstore8(out, 0, (__global half *)dst.ptr);
+}
+#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED)
+#endif // defined(BETA)
+
+#if defined(WIDTH_VECTOR_A)
+/** This OpenCL kernel computes the vector by matrix multiplication between each row of A (src0) and
+ * matrix B (src1) used for locally connected layer
+ *
+ * @note The width of A need to be passed at compile time using -DWIDTH_VECTOR_A
+ *
+ * @note The input A and matrix B must not be reshaped
+ *
+ * @param[in] src0_ptr Pointer to the source matrix. Supported data
+ * types: F32
+ * @param[in] src0_stride_x Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in] src0_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src0_stride_y Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in] src0_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in] src1_ptr Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in] src1_stride_x Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in] src1_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src1_stride_y Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in] src1_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src1_stride_z Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in] src1_step_z src_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in] src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[out] dst_ptr Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in] dst_stride_x Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in] dst_step_x dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ */
+__kernel void gemm_lc_vm_f32(IMAGE_DECLARATION(src0), TENSOR3D_DECLARATION(src1),
+ IMAGE_DECLARATION(dst))
+{
+ int idx = get_global_id(0) * 4;
+ int idy = get_global_id(1);
+
+ // Compute the address for the vector A and matrix B
+ int2 src_addr = ((int2)(src0_offset_first_element_in_bytes + src0_stride_y * idy,
+ src1_offset_first_element_in_bytes + src1_stride_z * idy));
+ src_addr.s1 += idx * sizeof(float);
+
+ int end_row_vec_a = src_addr.s0 + (WIDTH_VECTOR_A * sizeof(float));
+
+ float4 acc = 0.0f;
+
+ for (; src_addr.s0 <= (end_row_vec_a - 2 * (int)sizeof(float));
+ src_addr += (int2)(2 * sizeof(float), 2 * src1_stride_y))
+ {
+ float2 a0 = vload2(0, (__global float *)(src0_ptr + src_addr.s0));
+ float4 b0 = vload4(0, (__global float *)(src1_ptr + src_addr.s1));
+ float4 b1 = vload4(0, (__global float *)(src1_ptr + src_addr.s1 + src1_stride_y));
+
+ acc += b0 * (float4)a0.s0;
+ acc += b1 * (float4)a0.s1;
+ }
+
+ for (; src_addr.s0 < end_row_vec_a; src_addr += (int2)(sizeof(float), src1_stride_y))
+ {
+ float a0 = *((__global float *)(src0_ptr + src_addr.s0));
+ float4 b0 = vload4(0, (__global float *)(src1_ptr + src_addr.s1));
+
+ acc += b0 * (float4)a0;
+ }
+
+ // Compute destination address
+ Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+ vstore4(acc, 0, (__global float *)(offset(&dst, 0, 0)));
+}
+#endif // defined(WIDTH_VECTOR_A)
+
+/** This kernel accumulates each row with the biases vector.
+ *
+ * @note The data type must be passed at compile time using -DDATA_TYPE e.g. -DDATA_TYPE=short.
+ * @note The vector size must be passed at compile time using -DVECTOR_SIZE e.g. -DVECTOR_SIZE=16.
+ *
+ * @param[in, out] accum_ptr Pointer to the accumulate tensor. Supported
+ * data type: U8/S8/U16/S16/F16/U32/S32/F32
+ * @param[in] accum_stride_x Stride of the accmulate tensor in X
+ * dimension (in bytes)
+ * @param[in] accum_step_x accum_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] accum_stride_y Stride of the accumlulate tensor in Y
+ * dimension (in bytes)
+ * @param[in] accum_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] accum_offset_first_element_in_bytes The offset of the first element in the
+ * accumulate tensor
+ * @param[in] biases_ptr Pointer to the biases vector. Same as @p
+ * accum_ptr
+ * @param[in] biases_stride_x Stride of the destination tensor in X
+ * dimension (in bytes)
+ * @param[in] biases_step_x dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] biases_offset_first_element_in_bytes The offset of the first element in the
+ * destination tensor
+ */
+#if defined(DATA_TYPE) && defined(VECTOR_SIZE)
+__kernel void gemm_accumulate_biases(IMAGE_DECLARATION(accum), VECTOR_DECLARATION(biases))
+{
+ Image accum = CONVERT_TO_IMAGE_STRUCT(accum);
+ Vector biases = CONVERT_TO_VECTOR_STRUCT(biases);
+
+ // Vector size, e.g. number of vector elements.
+ VEC_DATA_TYPE(DATA_TYPE, VECTOR_SIZE)
+ accum_value = VLOAD(VECTOR_SIZE)(0, (__global DATA_TYPE *)accum.ptr);
+ VEC_DATA_TYPE(DATA_TYPE, VECTOR_SIZE)
+ biases_value = VLOAD(VECTOR_SIZE)(0, (__global DATA_TYPE *)biases.ptr);
+ accum_value = biases_value + accum_value;
+ // Store result in the accumulate buffer
+ VSTORE(VECTOR_SIZE)
+ (accum_value, 0, (__global DATA_TYPE *)accum.ptr);
+}
+#endif // defined(DATA_TYPE) && defined(VECTOR_SIZE)
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "activation_float_helpers.h"
+#include "helpers.h"
+
+/** Loads the rows from 0 to n-1 in the given variables (BASENAME0 to BASENAMEn-1).
+ * @name LOAD_ROW_n
+ *
+ * @param[in] N0 The number of rows to load
+ * @param[in] DATA_TYPE The data type of variables
+ * @param[in] BASENAME The basename of the destination variables for the loaded rows
+ * @param[in] PTR The base pointer
+ * @param[in] OFFSET The offset within a row
+ * @param[in] STRIDE_Y The stride value in y-axis direction
+ * @param[in] Z The z-axis offset vector
+ * @{
+ */
+#define LOAD_ROW_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##0 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 0 * STRIDE_Y + Z##0));
+
+#define LOAD_ROW_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ LOAD_ROW_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##1 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 1 * STRIDE_Y + Z##1));
+
+#define LOAD_ROW_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ LOAD_ROW_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##2 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 2 * STRIDE_Y + Z##2));
+
+#define LOAD_ROW_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ LOAD_ROW_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##3 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 3 * STRIDE_Y + Z##3));
+
+#define LOAD_ROW_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ LOAD_ROW_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##4 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 4 * STRIDE_Y + Z##4));
+
+#define LOAD_ROW_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ LOAD_ROW_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##5 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 5 * STRIDE_Y + Z##5));
+
+#define LOAD_ROW_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ LOAD_ROW_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##6 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 6 * STRIDE_Y + Z##6));
+
+#define LOAD_ROW_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ LOAD_ROW_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##7 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 7 * STRIDE_Y + Z##7));
+
+#define LOAD_ROW_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ LOAD_ROW_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##8 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 8 * STRIDE_Y + Z##8));
+
+#define LOAD_ROW_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ LOAD_ROW_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##9 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 9 * STRIDE_Y + Z##9));
+
+#define LOAD_ROW_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ LOAD_ROW_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##A = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 10 * STRIDE_Y + Z##A));
+
+#define LOAD_ROW_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ LOAD_ROW_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##B = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 11 * STRIDE_Y + Z##B));
+
+#define LOAD_ROW_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ LOAD_ROW_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##C = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 12 * STRIDE_Y + Z##C));
+
+#define LOAD_ROW_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ LOAD_ROW_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##D = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 13 * STRIDE_Y + Z##D));
+
+#define LOAD_ROW_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ LOAD_ROW_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##E = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 14 * STRIDE_Y + Z##E));
+
+#define LOAD_ROW_16(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ LOAD_ROW_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##F = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 15 * STRIDE_Y + Z##F));
+
+/** @}*/ // end of group LOAD_ROW_n
+
+/** Load Blocks (consecutive rows and columns) with Z offset.
+ * @name LOAD_BLOCK
+ *
+ * Supported cases are M0=1,2,3,...,16 and N0=1,2,3,4,8,16
+ * The data to load is expected to have consecutive names for each row.
+ * E.g., for M0=3, and BASENAME=c, the expected data is c0, c1 and c2.
+ * The Z offset is expected to have consecutive names.
+ * E.g., for M0=3, and Z=zin, the expected Z offsets are zin0, zin1 and zin2.
+ *
+ * @param[in] M0 The number of consecutive rows
+ * @param[in] N0 The number of consecutive columns
+ * @param[in] DATA_TYPE The data type of the target
+ * @param[in] BASENAME The basename of the result variables
+ * @param[in] PTR The base pointer for the data
+ * @param[in] OFFSET The offset within a row
+ * @param[in] STRIDE_Y The stride in y-axis direction
+ * @param[in] Z The z-axis offset vector
+ * @{
+ */
+#define LOAD_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ LOAD_ROW_##M0(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
+#define LOAD_BLOCK(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+ LOAD_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
+/** @} */ // end of group LOAD_BLOCK
+
+/** Loads the elements from 0 to n-1 in the given variables (BASENAME0 to BASENAMEn-1).
+ * @name LOAD_ELEMENT_n
+ *
+ * @param[in] N0 The number of rows to load
+ * @param[in] DATA_TYPE The data type of variables
+ * @param[in] BASENAME The basename of the destination variables for the loaded rows
+ * @param[in] PTR The base pointer
+ * @param[in] OFFSET The offset within a row
+ * @param[in] STRIDE_Y The stride value in y-axis direction
+ * @{
+ */
+#define LOAD_ELEMENT_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##0 = *((__global DATA_TYPE *)(PTR + OFFSET + 0 * STRIDE_Y));
+
+#define LOAD_ELEMENT_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ LOAD_ELEMENT_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##1 = *((__global DATA_TYPE *)(PTR + OFFSET + 1 * STRIDE_Y));
+
+#define LOAD_ELEMENT_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ LOAD_ELEMENT_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##2 = *((__global DATA_TYPE *)(PTR + OFFSET + 2 * STRIDE_Y));
+
+#define LOAD_ELEMENT_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ LOAD_ELEMENT_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##3 = *((__global DATA_TYPE *)(PTR + OFFSET + 3 * STRIDE_Y));
+
+#define LOAD_ELEMENT_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ LOAD_ELEMENT_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##4 = *((__global DATA_TYPE *)(PTR + OFFSET + 4 * STRIDE_Y));
+
+#define LOAD_ELEMENT_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ LOAD_ELEMENT_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##5 = *((__global DATA_TYPE *)(PTR + OFFSET + 5 * STRIDE_Y));
+
+#define LOAD_ELEMENT_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ LOAD_ELEMENT_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##6 = *((__global DATA_TYPE *)(PTR + OFFSET + 6 * STRIDE_Y));
+
+#define LOAD_ELEMENT_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ LOAD_ELEMENT_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##7 = *((__global DATA_TYPE *)(PTR + OFFSET + 7 * STRIDE_Y));
+
+#define LOAD_ELEMENT_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ LOAD_ELEMENT_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##8 = *((__global DATA_TYPE *)(PTR + OFFSET + 8 * STRIDE_Y));
+
+#define LOAD_ELEMENT_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ LOAD_ELEMENT_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##9 = *((__global DATA_TYPE *)(PTR + OFFSET + 9 * STRIDE_Y));
+
+#define LOAD_ELEMENT_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ LOAD_ELEMENT_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##A = *((__global DATA_TYPE *)(PTR + OFFSET + 10 * STRIDE_Y));
+
+#define LOAD_ELEMENT_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ LOAD_ELEMENT_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##B = *((__global DATA_TYPE *)(PTR + OFFSET + 11 * STRIDE_Y));
+
+#define LOAD_ELEMENT_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ LOAD_ELEMENT_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##C = *((__global DATA_TYPE *)(PTR + OFFSET + 12 * STRIDE_Y));
+
+#define LOAD_ELEMENT_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ LOAD_ELEMENT_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##D = *((__global DATA_TYPE *)(PTR + OFFSET + 13 * STRIDE_Y));
+
+#define LOAD_ELEMENT_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ LOAD_ELEMENT_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##E = *((__global DATA_TYPE *)(PTR + OFFSET + 14 * STRIDE_Y));
+
+#define LOAD_ELEMENT_16(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ LOAD_ELEMENT_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ VEC_DATA_TYPE(DATA_TYPE, N0) \
+ BASENAME##F = *((__global DATA_TYPE *)(PTR + OFFSET + 15 * STRIDE_Y));
+
+/** @}*/ // end of group LOAD_ELEMENT_n
+
+/** Load Scalar as Vector (consecutive elements).
+ * @name LOAD_SCALAR_AS_VECTOR
+ *
+ * Supported cases are M0=1,2,3,...,16 and N0=1,2,3,4,8,16
+ * The data to load is expected to have consecutive names for each row.
+ * E.g., for M0=3, and BASENAME=c, the expected data is c0, c1 and c2.
+ *
+ * @param[in] M0 The number of consecutive rows
+ * @param[in] N0 The number of consecutive columns
+ * @param[in] DATA_TYPE The data type of the target
+ * @param[in] BASENAME The basename of the result variables
+ * @param[in] PTR The base pointer for the data
+ * @param[in] OFFSET The offset within a row
+ * @param[in] STRIDE_Y The stride in y-axis direction
+ * @{
+ */
+#define LOAD_SCALAR_AS_VECTOR_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ LOAD_ELEMENT_##M0(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
+#define LOAD_SCALAR_AS_VECTOR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+ LOAD_SCALAR_AS_VECTOR_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
+/** @} */ // end of group LOAD_SCALAR_AS_VECTOR
+
+/** Basic macros to calculate Z offset values from Z0 to Zn-1
+ * @name CALCULATE_Z_OFFSET_n
+ *
+ * @param[in] M0 The number of offset values to calculate
+ * @param[in] DATA_TYPE The data type of the results
+ * @param[in] Z The basename of the result variables
+ * @param[in] Y The work-itme ID of y-axis
+ * @param[in] HEIGHT_GEMM3D The height of GEMM3D
+ * @param[in] DEPTH_GEMM3D The depth of GEMM3D
+ * @param[in] CROSS_PLANE_PAD The padding required for plane changes accross the z-dimension
+ * @param[in] STRIDE_Y The stride value in y-axis direction
+ *
+ * @{
+ */
+#define CALCULATE_Z_OFFSET_1(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+ STRIDE_Y) \
+ Z##0 = (0 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
+ Z##0 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##0); \
+ Z##0 *= (CROSS_PLANE_PAD * STRIDE_Y);
+
+#define CALCULATE_Z_OFFSET_2(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+ STRIDE_Y) \
+ CALCULATE_Z_OFFSET_1(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+ STRIDE_Y) \
+ Z##1 = (1 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
+ Z##1 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##1); \
+ Z##1 *= (CROSS_PLANE_PAD * STRIDE_Y);
+
+#define CALCULATE_Z_OFFSET_3(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+ STRIDE_Y) \
+ CALCULATE_Z_OFFSET_2(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+ STRIDE_Y) \
+ Z##2 = (2 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
+ Z##2 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##2); \
+ Z##2 *= (CROSS_PLANE_PAD * STRIDE_Y);
+
+#define CALCULATE_Z_OFFSET_4(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+ STRIDE_Y) \
+ CALCULATE_Z_OFFSET_3(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+ STRIDE_Y) \
+ Z##3 = (3 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
+ Z##3 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##3); \
+ Z##3 *= (CROSS_PLANE_PAD * STRIDE_Y);
+
+#define CALCULATE_Z_OFFSET_5(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+ STRIDE_Y) \
+ CALCULATE_Z_OFFSET_4(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+ STRIDE_Y) \
+ Z##4 = (4 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
+ Z##4 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##4); \
+ Z##4 *= (CROSS_PLANE_PAD * STRIDE_Y);
+
+#define CALCULATE_Z_OFFSET_6(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+ STRIDE_Y) \
+ CALCULATE_Z_OFFSET_5(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+ STRIDE_Y) \
+ Z##5 = (5 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
+ Z##5 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##5); \
+ Z##5 *= (CROSS_PLANE_PAD * STRIDE_Y);
+
+#define CALCULATE_Z_OFFSET_7(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+ STRIDE_Y) \
+ CALCULATE_Z_OFFSET_6(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+ STRIDE_Y) \
+ Z##6 = (6 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
+ Z##6 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##6); \
+ Z##6 *= (CROSS_PLANE_PAD * STRIDE_Y);
+
+#define CALCULATE_Z_OFFSET_8(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+ STRIDE_Y) \
+ CALCULATE_Z_OFFSET_7(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+ STRIDE_Y) \
+ Z##7 = (7 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
+ Z##7 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##7); \
+ Z##7 *= (CROSS_PLANE_PAD * STRIDE_Y);
+
+/** @} */ // end of group CALCULATE_Z_OFFSET_n
+
+/** Calculate Z offset values from Z0 to Zn-1
+ * @name CALCULATE_Z_OFFSET
+ *
+ * The Z offsets are expected to have consecutive names.
+ * E.g., for M0=3 and Z=zin, the expected names of Z offsets are zin1, zin2, zin3.
+ * Note that, CROSS_PLANE_PAD (cross plain padding) is required to take into account
+ * the possible cross plane paddings in case of the plance changes across the z-dimension.
+ *
+ * <!--
+ * | |
+ * | plane0 |
+ * | |
+ * |__________________|
+ * |******************|
+ * | cross_plane_pad |
+ * |******************|
+ * | |
+ * | plane1 |
+ * | |
+ * |__________________|
+ * -->
+ *
+ * @param[in] M0 The number of offset values to calculate
+ * @param[in] DATA_TYPE The data type of the results
+ * @param[in] Z The basename of the result variables
+ * @param[in] Y The work-itme ID of y-axis
+ * @param[in] HEIGHT_GEMM3D The height of GEMM3D
+ * @param[in] DEPTH_GEMM3D The depth of GEMM3D
+ * @param[in] CROSS_PLANE_PAD The padding required for plane changes accross the z-dimension
+ * @param[in] STRIDE_Y The stride value in y-axis direction
+ * @{
+ */
+#define CALCULATE_Z_OFFSET_STR(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+ STRIDE_Y) \
+ CALCULATE_Z_OFFSET_##M0(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+ STRIDE_Y)
+#define CALCULATE_Z_OFFSET(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+ STRIDE_Y) \
+ CALCULATE_Z_OFFSET_STR(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+ STRIDE_Y)
+/** @} */ // end of group CALCULATE_Z_OFFSET
+
+/** Store the 0 to (n-1)th rows of the given variables
+ * @name STORE_ROW_n
+ *
+ * @param[in] N0 The size of the vectors
+ * @param[in] DATA_TYPE The data type of the vectors
+ * @param[in] BASENAME The basename of the variables
+ * @param[in] PTR The base pointer
+ * @param[in] STRIDE_Y The stride value in y-axis direction
+ * @param[in] Z The offset in z-axis direction
+ * @{
+ */
+#define STORE_ROW_1(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (BASENAME##0, 0, (__global DATA_TYPE *)(PTR + 0 * STRIDE_Y + Z##0));
+
+#define STORE_ROW_2(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ STORE_ROW_1(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (BASENAME##1, 0, (__global DATA_TYPE *)(PTR + 1 * STRIDE_Y + Z##1));
+
+#define STORE_ROW_3(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ STORE_ROW_2(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (BASENAME##2, 0, (__global DATA_TYPE *)(PTR + 2 * STRIDE_Y + Z##2));
+
+#define STORE_ROW_4(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ STORE_ROW_3(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (BASENAME##3, 0, (__global DATA_TYPE *)(PTR + 3 * STRIDE_Y + Z##3));
+
+#define STORE_ROW_5(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ STORE_ROW_4(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (BASENAME##4, 0, (__global DATA_TYPE *)(PTR + 4 * STRIDE_Y + Z##4));
+
+#define STORE_ROW_6(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ STORE_ROW_5(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (BASENAME##5, 0, (__global DATA_TYPE *)(PTR + 5 * STRIDE_Y + Z##5));
+
+#define STORE_ROW_7(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ STORE_ROW_6(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (BASENAME##6, 0, (__global DATA_TYPE *)(PTR + 6 * STRIDE_Y + Z##6));
+
+#define STORE_ROW_8(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ STORE_ROW_7(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (BASENAME##7, 0, (__global DATA_TYPE *)(PTR + 7 * STRIDE_Y + Z##7));
+
+#define STORE_ROW_9(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ STORE_ROW_8(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (BASENAME##8, 0, (__global DATA_TYPE *)(PTR + 8 * STRIDE_Y + Z##8));
+
+#define STORE_ROW_10(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ STORE_ROW_9(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (BASENAME##9, 0, (__global DATA_TYPE *)(PTR + 9 * STRIDE_Y + Z##9));
+
+#define STORE_ROW_11(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ STORE_ROW_10(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (BASENAME##A, 0, (__global DATA_TYPE *)(PTR + 10 * STRIDE_Y + Z##A));
+
+#define STORE_ROW_12(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ STORE_ROW_11(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (BASENAME##B, 0, (__global DATA_TYPE *)(PTR + 11 * STRIDE_Y + Z##B));
+
+#define STORE_ROW_13(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ STORE_ROW_12(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (BASENAME##C, 0, (__global DATA_TYPE *)(PTR + 12 * STRIDE_Y + Z##C));
+
+#define STORE_ROW_14(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ STORE_ROW_13(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (BASENAME##D, 0, (__global DATA_TYPE *)(PTR + 13 * STRIDE_Y + Z##D));
+
+#define STORE_ROW_15(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ STORE_ROW_14(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (BASENAME##E, 0, (__global DATA_TYPE *)(PTR + 14 * STRIDE_Y + Z##E));
+
+#define STORE_ROW_16(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ STORE_ROW_15(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (BASENAME##F, 0, (__global DATA_TYPE *)(PTR + 15 * STRIDE_Y + Z##F));
+/** @} */ // end of groupd STORE_ROW_n
+
+/** Convert and store the 0th to (n-1)th rows of the given variables
+ * @name CONVERT_STORE_ROW_n
+ *
+ * @param[in] N0 The size of the vectors
+ * @param[in] DATA_TYPE The data type of the vectors
+ * @param[in] BASENAME The basename of the variables
+ * @param[in] PTR The base pointer
+ * @param[in] STRIDE_Y The stride value in y-axis direction
+ * @param[in] Z The offset in z-axis direction
+ * @{
+ */
+#define CONVERT_STORE_ROW_1(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (CONVERT_SAT((BASENAME##0), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
+ (__global DATA_TYPE *)(PTR + 0 * STRIDE_Y + Z##0));
+
+#define CONVERT_STORE_ROW_2(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ CONVERT_STORE_ROW_1(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (CONVERT_SAT((BASENAME##1), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
+ (__global DATA_TYPE *)(PTR + 1 * STRIDE_Y + Z##1));
+
+#define CONVERT_STORE_ROW_3(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ CONVERT_STORE_ROW_2(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (CONVERT_SAT((BASENAME##2), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
+ (__global DATA_TYPE *)(PTR + 2 * STRIDE_Y + Z##2));
+
+#define CONVERT_STORE_ROW_4(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ CONVERT_STORE_ROW_3(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (CONVERT_SAT((BASENAME##3), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
+ (__global DATA_TYPE *)(PTR + 3 * STRIDE_Y + Z##3));
+
+#define CONVERT_STORE_ROW_5(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ CONVERT_STORE_ROW_4(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (CONVERT_SAT((BASENAME##4), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
+ (__global DATA_TYPE *)(PTR + 4 * STRIDE_Y + Z##4));
+
+#define CONVERT_STORE_ROW_6(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ CONVERT_STORE_ROW_5(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (CONVERT_SAT((BASENAME##5), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
+ (__global DATA_TYPE *)(PTR + 5 * STRIDE_Y + Z##5));
+
+#define CONVERT_STORE_ROW_7(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ CONVERT_STORE_ROW_6(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (CONVERT_SAT((BASENAME##6), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
+ (__global DATA_TYPE *)(PTR + 6 * STRIDE_Y + Z##6));
+
+#define CONVERT_STORE_ROW_8(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ CONVERT_STORE_ROW_7(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (CONVERT_SAT((BASENAME##7), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
+ (__global DATA_TYPE *)(PTR + 7 * STRIDE_Y + Z##7));
+
+#define CONVERT_STORE_ROW_9(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ CONVERT_STORE_ROW_8(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (CONVERT_SAT((BASENAME##8), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
+ (__global DATA_TYPE *)(PTR + 8 * STRIDE_Y + Z##8));
+
+#define CONVERT_STORE_ROW_10(N0, DATA, BASENAME, PTR, STRIDE_Y, Z) \
+ CONVERT_STORE_ROW_9(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (CONVERT_SAT((BASENAME##9), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
+ (__global DATA_TYPE *)(PTR + 9 * STRIDE_Y + Z##9));
+
+#define CONVERT_STORE_ROW_11(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ CONVERT_STORE_ROW_10(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (CONVERT_SAT((BASENAME##A), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
+ (__global DATA_TYPE *)(PTR + 10 * STRIDE_Y + Z##A));
+
+#define CONVERT_STORE_ROW_12(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ CONVERT_STORE_ROW_11(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (CONVERT_SAT((BASENAME##B), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
+ (__global DATA_TYPE *)(PTR + 11 * STRIDE_Y + Z##B));
+
+#define CONVERT_STORE_ROW_13(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ CONVERT_STORE_ROW_12(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (CONVERT_SAT((BASENAME##C), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
+ (__global DATA_TYPE *)(PTR + 12 * STRIDE_Y + Z##C));
+
+#define CONVERT_STORE_ROW_14(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ CONVERT_STORE_ROW_13(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (CONVERT_SAT((BASENAME##D), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
+ (__global DATA_TYPE *)(PTR + 13 * STRIDE_Y + Z##D));
+
+#define CONVERT_STORE_ROW_15(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ CONVERT_STORE_ROW_14(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (CONVERT_SAT((BASENAME##E), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
+ (__global DATA_TYPE *)(PTR + 14 * STRIDE_Y + Z##E));
+
+#define CONVERT_STORE_ROW_16(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ CONVERT_STORE_ROW_15(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ VSTORE(N0) \
+ (CONVERT_SAT((BASENAME##F), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, \
+ (__global DATA_TYPE *)(PTR + 15 * STRIDE_Y + Z##F));
+
+/** @} */ // end of groupd CONVERT_STORE_ROW_n
+
+/** Store a block of the given size M0xN0
+ * @name STORE_BLOCK
+ *
+ * Supported cases are M0=1,2,3,...,16 and N0=2,3,4,8,16.
+ * The data to store is expected to have consecutive names for each row.
+ * E.g., for M0=3 and basename=c, the expected names are c0, c1 and c2.
+ * The Z offset is expected to have consecutive names.
+ * E.g., for M0=3 and Z=zin, the expected z offset names are zin0, zin1 and zin2.
+ *
+ * @param[in] M0 The number of rows to store
+ * @param[in] N0 The size of each vector
+ * @param[in] DATA_TYPE The data type of the vectors
+ * @param[in] BASENAME The basename of the variables
+ * @param[in] PTR The base pointer
+ * @param[in] STRIDE_Y The stride value in y-axis direction
+ * @param[in] Z The offset in z-axis direction
+ * @{
+ */
+#define STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ STORE_ROW_##M0(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)
+#define STORE_BLOCK(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)
+/** @} */ // end of group STORE_BLOCK
+
+/** Convert and store a block of the given size M0xN0
+ * @name CONVERT_STORE_BLOCK
+ *
+ * Supported cases are M0=1,2,3,...,16 and N0=2,3,4,8,16.
+ * The data to store is expected to have consecutive names for each row.
+ * E.g., for M0=3 and basename=c, the expected names are c0, c1 and c2.
+ * The Z offset is expected to have consecutive names.
+ * E.g., for M0=3 and Z=zin, the expected z offset names are zin0, zin1 and zin2.
+ *
+ * @param[in] M0 The number of rows to store
+ * @param[in] N0 The size of each vector
+ * @param[in] DATA_TYPE The data type of the vectors
+ * @param[in] BASENAME The basename of the variables
+ * @param[in] PTR The base pointer
+ * @param[in] STRIDE_Y The stride value in y-axis direction
+ * @param[in] Z The offset in z-axis direction
+ * @{
+ */
+#define CONVERT_STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ CONVERT_STORE_ROW_##M0(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)
+#define CONVERT_STORE_BLOCK(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+ CONVERT_STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)
+/** @} */ // end of group CONVERT_STORE_BLOCK
+
+/** Scale the rows in the given variables (BASENAME0 to BASENAMEn-1)
+ * @name SCALE_ROW_n
+ *
+ * @param[in] DATA_TYPE The data type of the variables
+ * @param[in] BASENAME The basename of the variables
+ * @param[in] SCALE The scale factor
+ * @{
+ */
+#define SCALE_ROW_1(DATA_TYPE, BASENAME, SCALE) BASENAME##0 *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_2(DATA_TYPE, BASENAME, SCALE) \
+ SCALE_ROW_1(DATA_TYPE, BASENAME, SCALE) \
+ BASENAME##1 *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_3(DATA_TYPE, BASENAME, SCALE) \
+ SCALE_ROW_2(DATA_TYPE, BASENAME, SCALE) \
+ BASENAME##2 *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_4(DATA_TYPE, BASENAME, SCALE) \
+ SCALE_ROW_3(DATA_TYPE, BASENAME, SCALE) \
+ BASENAME##3 *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_5(DATA_TYPE, BASENAME, SCALE) \
+ SCALE_ROW_4(DATA_TYPE, BASENAME, SCALE) \
+ BASENAME##4 *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_6(DATA_TYPE, BASENAME, SCALE) \
+ SCALE_ROW_5(DATA_TYPE, BASENAME, SCALE) \
+ BASENAME##5 *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_7(DATA_TYPE, BASENAME, SCALE) \
+ SCALE_ROW_6(DATA_TYPE, BASENAME, SCALE) \
+ BASENAME##6 *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_8(DATA_TYPE, BASENAME, SCALE) \
+ SCALE_ROW_7(DATA_TYPE, BASENAME, SCALE) \
+ BASENAME##7 *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_9(DATA_TYPE, BASENAME, SCALE) \
+ SCALE_ROW_8(DATA_TYPE, BASENAME, SCALE) \
+ BASENAME##8 *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_10(DATA_TYPE, BASENAME, SCALE) \
+ SCALE_ROW_9(DATA_TYPE, BASENAME, SCALE) \
+ BASENAME##9 *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_11(DATA_TYPE, BASENAME, SCALE) \
+ SCALE_ROW_10(DATA_TYPE, BASENAME, SCALE) \
+ BASENAME##A *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_12(DATA_TYPE, BASENAME, SCALE) \
+ SCALE_ROW_11(DATA_TYPE, BASENAME, SCALE) \
+ BASENAME##B *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_13(DATA_TYPE, BASENAME, SCALE) \
+ SCALE_ROW_12(DATA_TYPE, BASENAME, SCALE) \
+ BASENAME##C *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_14(DATA_TYPE, BASENAME, SCALE) \
+ SCALE_ROW_13(DATA_TYPE, BASENAME, SCALE) \
+ BASENAME##D *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_15(DATA_TYPE, BASENAME, SCALE) \
+ SCALE_ROW_14(DATA_TYPE, BASENAME, SCALE) \
+ BASENAME##E *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_16(DATA_TYPE, BASENAME, SCALE) \
+ SCALE_ROW_15(DATA_TYPE, BASENAME, SCALE) \
+ BASENAME##F *= (DATA_TYPE)SCALE;
+/** @} */ // end of group SCALE_ROW_n
+
+/** Scale elements stored in a block (BASENAME)
+ * @name SCALE_BLOCK
+ *
+ * Supported cases are N=1,2,3,...,16
+ *
+ * @param[in] N The number of rows in the block
+ * @param[in] DATA_TYPE The data type of the block
+ * @param[in] BASENAME The basename of the block
+ * @param[in] SCALE The scale factor
+ * @{
+ */
+#define SCALE_BLOCK_STR(N, DATA_TYPE, BASENAME, SCALE) SCALE_ROW_##N(DATA_TYPE, BASENAME, SCALE)
+#define SCALE_BLOCK(N, DATA_TYPE, BASENAME, SCALE) SCALE_BLOCK_STR(N, DATA_TYPE, BASENAME, SCALE)
+/** @} */ // end of group SCALE_BLOCK
+
+/** Create a new vector containing the values at the given index for a set of given vectors
+ * @name COLUMN_VECTORn
+ *
+ * @param[in] IDX_COL The index value
+ * @param[in] BASENAME The basename of the destination vectors
+ * @param[in] X The basename of the source vectors
+ * @param[in] TYPE The data type of the destination vectors
+ * @{
+ */
+#define COLUMN_VECTOR1(IDX_COL, BASENAME, X, TYPE) \
+ TYPE BASENAME##IDX_COL = (TYPE)((X##0).s##IDX_COL);
+#define COLUMN_VECTOR2(IDX_COL, BASENAME, X, TYPE) \
+ VEC_DATA_TYPE(TYPE, 2) \
+ BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 2))((X##0).s##IDX_COL, (X##1).s##IDX_COL);
+#define COLUMN_VECTOR3(IDX_COL, BASENAME, X, TYPE) \
+ VEC_DATA_TYPE(TYPE, 3) \
+ BASENAME##IDX_COL = \
+ (VEC_DATA_TYPE(TYPE, 3))((X##0).s##IDX_COL, (X##1).s##IDX_COL, (X##2).s##IDX_COL);
+#define COLUMN_VECTOR4(IDX_COL, BASENAME, X, TYPE) \
+ VEC_DATA_TYPE(TYPE, 4) \
+ BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 4))((X##0).s##IDX_COL, (X##1).s##IDX_COL, \
+ (X##2).s##IDX_COL, (X##3).s##IDX_COL);
+#define COLUMN_VECTOR8(IDX_COL, BASENAME, X, TYPE) \
+ VEC_DATA_TYPE(TYPE, 8) \
+ BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 8))( \
+ (X##0).s##IDX_COL, (X##1).s##IDX_COL, (X##2).s##IDX_COL, (X##3).s##IDX_COL, (X##4).s##IDX_COL, \
+ (X##5).s##IDX_COL, (X##6).s##IDX_COL, (X##7).s##IDX_COL);
+#define COLUMN_VECTOR16(IDX_COL, BASENAME, X, TYPE) \
+ VEC_DATA_TYPE(TYPE, 16) \
+ BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 16))( \
+ (X##0).s##IDX_COL, (X##1).s##IDX_COL, (X##2).s##IDX_COL, (X##3).s##IDX_COL, (X##4).s##IDX_COL, \
+ (X##5).s##IDX_COL, (X##6).s##IDX_COL, (X##7).s##IDX_COL, (X##8).s##IDX_COL, (X##9).s##IDX_COL, \
+ (X##A).s##IDX_COL, (X##B).s##IDX_COL, (X##C).s##IDX_COL, (X##D).s##IDX_COL, (X##E).s##IDX_COL, \
+ (X##F).s##IDX_COL);
+/** @} */ // end of group COLUMN_VECTORn
+
+/** Create a new vector containing the values at the given index. Utility macros for transposing a
+ * colum-vector
+ * @name COLUMN_VECTOR_SCALARn
+ *
+ * @param[in] IDX_COL The index value
+ * @param[in] BASENAME The basename of the destination vectors
+ * @param[in] X The basename of the source vectors
+ * @param[in] TYPE The data type of the destination vectors
+ * @{
+ */
+#define COLUMN_VECTOR_SCALAR1(IDX_COL, BASENAME, X, TYPE) TYPE BASENAME##IDX_COL = (TYPE)((X##0));
+#define COLUMN_VECTOR_SCALAR2(IDX_COL, BASENAME, X, TYPE) \
+ VEC_DATA_TYPE(TYPE, 2) \
+ BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 2))((X##0), (X##1));
+#define COLUMN_VECTOR_SCALAR3(IDX_COL, BASENAME, X, TYPE) \
+ VEC_DATA_TYPE(TYPE, 3) \
+ BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 3))((X##0), (X##1), (X##2));
+#define COLUMN_VECTOR_SCALAR4(IDX_COL, BASENAME, X, TYPE) \
+ VEC_DATA_TYPE(TYPE, 4) \
+ BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 4))((X##0), (X##1), (X##2), (X##3));
+#define COLUMN_VECTOR_SCALAR8(IDX_COL, BASENAME, X, TYPE) \
+ VEC_DATA_TYPE(TYPE, 8) \
+ BASENAME##IDX_COL = \
+ (VEC_DATA_TYPE(TYPE, 8))((X##0), (X##1), (X##2), (X##3), (X##4), (X##5), (X##6), (X##7));
+#define COLUMN_VECTOR_SCALAR16(IDX_COL, BASENAME, X, TYPE) \
+ VEC_DATA_TYPE(TYPE, 16) \
+ BASENAME##IDX_COL = \
+ (VEC_DATA_TYPE(TYPE, 16))((X##0), (X##1), (X##2), (X##3), (X##4), (X##5), (X##6), (X##7), \
+ (X##8), (X##9), (X##A), (X##B), (X##C), (X##D), (X##E), (X##F));
+/** @} */ // end of group COLUMN_VECTORn
+
+/** Create transposed vectors of the given vectors
+ * @name TRANSPOSE_K0Xn
+ *
+ * @param[in] K0 The size of the source vectors
+ * @param[in] BASENAME The basename of transposed vectors
+ * @param[in] B The basename of source vectors for transposition
+ * @param[in] TYPE The data type of the transposed vectors
+ * @{
+ */
+#define TRANSPOSE_K0X1(K0, BASENAME, B, TYPE) COLUMN_VECTOR_SCALAR(K0, 0, BASENAME, B, TYPE);
+#define TRANSPOSE_K0X2(K0, BASENAME, B, TYPE) \
+ COLUMN_VECTOR(K0, 0, BASENAME, B, TYPE); \
+ COLUMN_VECTOR(K0, 1, BASENAME, B, TYPE);
+#define TRANSPOSE_K0X3(K0, BASENAME, B, TYPE) \
+ TRANSPOSE_K0X2(K0, BASENAME, B, TYPE); \
+ COLUMN_VECTOR(K0, 2, BASENAME, B, TYPE);
+#define TRANSPOSE_K0X4(K0, BASENAME, B, TYPE) \
+ TRANSPOSE_K0X3(K0, BASENAME, B, TYPE); \
+ COLUMN_VECTOR(K0, 3, BASENAME, B, TYPE);
+#define TRANSPOSE_K0X8(K0, BASENAME, B, TYPE) \
+ TRANSPOSE_K0X4(K0, BASENAME, B, TYPE); \
+ COLUMN_VECTOR(K0, 4, BASENAME, B, TYPE); \
+ COLUMN_VECTOR(K0, 5, BASENAME, B, TYPE); \
+ COLUMN_VECTOR(K0, 6, BASENAME, B, TYPE); \
+ COLUMN_VECTOR(K0, 7, BASENAME, B, TYPE);
+#define TRANSPOSE_K0X16(K0, BASENAME, B, TYPE) \
+ TRANSPOSE_K0X8(K0, BASENAME, B, TYPE); \
+ COLUMN_VECTOR(K0, 8, BASENAME, B, TYPE); \
+ COLUMN_VECTOR(K0, 9, BASENAME, B, TYPE); \
+ COLUMN_VECTOR(K0, A, BASENAME, B, TYPE); \
+ COLUMN_VECTOR(K0, B, BASENAME, B, TYPE); \
+ COLUMN_VECTOR(K0, C, BASENAME, B, TYPE); \
+ COLUMN_VECTOR(K0, D, BASENAME, B, TYPE); \
+ COLUMN_VECTOR(K0, E, BASENAME, B, TYPE); \
+ COLUMN_VECTOR(K0, F, BASENAME, B, TYPE);
+
+/** @} */ // end of group TRANSPOSE_K0Xn
+
+/** Create column vectors to contain the values at the given index for a set of given vectors
+ *
+ * @param[in] K0 The number of source vectors
+ * @param[in] IDX_COL The index value
+ * @param[in] BASENAME The basename of the destination vectors
+ * @param[in] B The basename of the source vectors
+ * @param[in] TYPE The data type of the destination vectors
+ */
+#define COLUMN_VECTOR(K0, IDX_COL, BASENAME, B, TYPE) \
+ CONCAT(COLUMN_VECTOR, K0) \
+ (IDX_COL, BASENAME, B, TYPE);
+
+/** Create column vectors to contain the values at the given index. Utility macro for transposing a
+ * column-vector
+ *
+ * @param[in] K0 The number of source vectors
+ * @param[in] IDX_COL The index value
+ * @param[in] BASENAME The basename of the destination vectors
+ * @param[in] B The basename of the source vectors
+ * @param[in] TYPE The data type of the destination vectors
+ */
+#define COLUMN_VECTOR_SCALAR(K0, IDX_COL, BASENAME, B, TYPE) \
+ CONCAT(COLUMN_VECTOR_SCALAR, K0) \
+ (IDX_COL, BASENAME, B, TYPE);
+
+/** Create transposed vectors form the given source vectors
+ *
+ * @param[in] K0 The size of source vectors
+ * @param[in] N0 The number of source vectors
+ * @param[in] BASENAME The basename of transposed vectors
+ * @param[in] B The basename of source vectors for transposition
+ * @param[in] TYPE The data type of the transposed vectors
+ *
+ */
+#define TRANSPOSE_K0XN0(K0, N0, BASENAME, B, TYPE) \
+ CONCAT(TRANSPOSE_K0X, N0) \
+ (K0, BASENAME, B, TYPE);
+
+/** Add the variables (BIAS0 to BIASn-1) to the others (BASENAME0 to BASENAMEn-1)
+ * @name ADD_ROW_n
+ *
+ * @param[in] BASENAME The basename of the destination variables
+ * @param[in] BIAS The basename of the added variables
+ * @{
+ */
+#define ADD_ROW_1(BASENAME, BIAS) BASENAME##0 += BIAS##0;
+
+#define ADD_ROW_2(BASENAME, BIAS) \
+ ADD_ROW_1(BASENAME, BIAS) \
+ BASENAME##1 += BIAS##1;
+
+#define ADD_ROW_3(BASENAME, BIAS) \
+ ADD_ROW_2(BASENAME, BIAS) \
+ BASENAME##2 += BIAS##2;
+
+#define ADD_ROW_4(BASENAME, BIAS) \
+ ADD_ROW_3(BASENAME, BIAS) \
+ BASENAME##3 += BIAS##3;
+
+#define ADD_ROW_5(BASENAME, BIAS) \
+ ADD_ROW_4(BASENAME, BIAS) \
+ BASENAME##4 += BIAS##4;
+
+#define ADD_ROW_6(BASENAME, BIAS) \
+ ADD_ROW_5(BASENAME, BIAS) \
+ BASENAME##5 += BIAS##5;
+
+#define ADD_ROW_7(BASENAME, BIAS) \
+ ADD_ROW_6(BASENAME, BIAS) \
+ BASENAME##6 += BIAS##6;
+
+#define ADD_ROW_8(BASENAME, BIAS) \
+ ADD_ROW_7(BASENAME, BIAS) \
+ BASENAME##7 += BIAS##7;
+
+#define ADD_ROW_9(BASENAME, BIAS) \
+ ADD_ROW_8(BASENAME, BIAS) \
+ BASENAME##8 += BIAS##8;
+
+#define ADD_ROW_10(BASENAME, BIAS) \
+ ADD_ROW_9(BASENAME, BIAS) \
+ BASENAME##9 += BIAS##9;
+
+#define ADD_ROW_11(BASENAME, BIAS) \
+ ADD_ROW_10(BASENAME, BIAS) \
+ BASENAME##A += BIAS##A;
+
+#define ADD_ROW_12(BASENAME, BIAS) \
+ ADD_ROW_11(BASENAME, BIAS) \
+ BASENAME##B += BIAS##B;
+
+#define ADD_ROW_13(BASENAME, BIAS) \
+ ADD_ROW_12(BASENAME, BIAS) \
+ BASENAME##C += BIAS##C;
+
+#define ADD_ROW_14(BASENAME, BIAS) \
+ ADD_ROW_13(BASENAME, BIAS) \
+ BASENAME##D += BIAS##D;
+
+#define ADD_ROW_15(BASENAME, BIAS) \
+ ADD_ROW_14(BASENAME, BIAS) \
+ BASENAME##E += BIAS##E;
+
+#define ADD_ROW_16(BASENAME, BIAS) \
+ ADD_ROW_15(BASENAME, BIAS) \
+ BASENAME##F += BIAS##F;
+
+/** @} */ // end of group ADD_ROW_n
+
+/** Add the block (BIAS) to another block (BASENAME)
+ * @name ADD_BLOCK
+ *
+ * Supported cases are N=1,2,3,...,16
+ *
+ * @param[in] N The number of vectors in the block
+ * @param[in] BASENAME The basename of the destination variables
+ * @param[in] BIAS The basename of the added variables
+ * @{
+ */
+#define ADD_BLOCK_STR(N, BASENAME, BIAS) ADD_ROW_##N(BASENAME, BIAS)
+#define ADD_BLOCK(N, BASENAME, BIAS) ADD_BLOCK_STR(N, BASENAME, BIAS)
+/** @} */ // end of group ADD_BLOCK
+
+/** Broadcast (add single value) to the each element of the destination variables
+ * @name ADD_ROW_BROADCAST_n
+ *
+ * @param[in] BASENAME The basename of the destination variables
+ * @param[in] BIAS The variable containing the value to add
+ * @{
+ */
+#define ADD_ROW_BROADCAST_1(BASENAME, BIAS) BASENAME##0 += BIAS;
+
+#define ADD_ROW_BROADCAST_2(BASENAME, BIAS) \
+ ADD_ROW_BROADCAST_1(BASENAME, BIAS) \
+ BASENAME##1 += BIAS;
+
+#define ADD_ROW_BROADCAST_3(BASENAME, BIAS) \
+ ADD_ROW_BROADCAST_2(BASENAME, BIAS) \
+ BASENAME##2 += BIAS;
+
+#define ADD_ROW_BROADCAST_4(BASENAME, BIAS) \
+ ADD_ROW_BROADCAST_3(BASENAME, BIAS) \
+ BASENAME##3 += BIAS;
+
+#define ADD_ROW_BROADCAST_5(BASENAME, BIAS) \
+ ADD_ROW_BROADCAST_4(BASENAME, BIAS) \
+ BASENAME##4 += BIAS;
+
+#define ADD_ROW_BROADCAST_6(BASENAME, BIAS) \
+ ADD_ROW_BROADCAST_5(BASENAME, BIAS) \
+ BASENAME##5 += BIAS;
+
+#define ADD_ROW_BROADCAST_7(BASENAME, BIAS) \
+ ADD_ROW_BROADCAST_6(BASENAME, BIAS) \
+ BASENAME##6 += BIAS;
+
+#define ADD_ROW_BROADCAST_8(BASENAME, BIAS) \
+ ADD_ROW_BROADCAST_7(BASENAME, BIAS) \
+ BASENAME##7 += BIAS;
+
+#define ADD_ROW_BROADCAST_9(BASENAME, BIAS) \
+ ADD_ROW_BROADCAST_8(BASENAME, BIAS) \
+ BASENAME##8 += BIAS;
+
+#define ADD_ROW_BROADCAST_10(BASENAME, BIAS) \
+ ADD_ROW_BROADCAST_9(BASENAME, BIAS) \
+ BASENAME##9 += BIAS;
+
+#define ADD_ROW_BROADCAST_11(BASENAME, BIAS) \
+ ADD_ROW_BROADCAST_10(BASENAME, BIAS) \
+ BASENAME##A += BIAS;
+
+#define ADD_ROW_BROADCAST_12(BASENAME, BIAS) \
+ ADD_ROW_BROADCAST_11(BASENAME, BIAS) \
+ BASENAME##B += BIAS;
+
+#define ADD_ROW_BROADCAST_13(BASENAME, BIAS) \
+ ADD_ROW_BROADCAST_12(BASENAME, BIAS) \
+ BASENAME##C += BIAS;
+
+#define ADD_ROW_BROADCAST_14(BASENAME, BIAS) \
+ ADD_ROW_BROADCAST_13(BASENAME, BIAS) \
+ BASENAME##D += BIAS;
+
+#define ADD_ROW_BROADCAST_15(BASENAME, BIAS) \
+ ADD_ROW_BROADCAST_14(BASENAME, BIAS) \
+ BASENAME##E += BIAS;
+
+#define ADD_ROW_BROADCAST_16(BASENAME, BIAS) \
+ ADD_ROW_BROADCAST_15(BASENAME, BIAS) \
+ BASENAME##F += BIAS;
+
+/** Broadcast (add a value) to the each element of the destination block (BASENAME)
+ * @name ADD_BLOCK_BROADCAST
+ *
+ * Supported cases are N=1,2,3,...,16.
+ *
+ * @param[in] N The number of vectors in the block
+ * @param[in] BASENAME The basename of the destination variables
+ * @param[in] BIAS The variable containing the value to add
+ * @{
+ */
+#define ADD_BLOCK_BROADCAST_STR(N, BASENAME, BIAS) ADD_ROW_BROADCAST_##N(BASENAME, BIAS)
+#define ADD_BLOCK_BROADCAST(N, BASENAME, BIAS) ADD_BLOCK_BROADCAST_STR(N, BASENAME, BIAS)
+/** @} */ // end of group ADD_BLOCK_BROADCAST
+
+/** Apply activation to the given variables
+ * @name ACTIVATION_ROW_n
+ *
+ * @param[in] ACTIVATION_TYPE The type of the activation
+ * @param[in] DATA_TYPE The data type of the vectors
+ * @param[in] BASENAME The basename of the variables
+ * @param[in] A_VAL Additional value required by the activation
+ * @param[in] B_VAL Additional value required by the activation
+ * @{
+ */
+#define ACTIVATION_ROW_1(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##0 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##0, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_2(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_ROW_1(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##1 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##1, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_3(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_ROW_2(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##2 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##2, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_4(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_ROW_3(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##3 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##3, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_5(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_ROW_4(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##4 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##4, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_6(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_ROW_5(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##5 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##5, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_7(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_ROW_6(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##6 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##6, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_8(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_ROW_7(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##7 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##7, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_9(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_ROW_8(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##8 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##8, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_10(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_ROW_9(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##9 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##9, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_11(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_ROW_10(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##A = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##A, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_12(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_ROW_11(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##B = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##B, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_13(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_ROW_12(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##C = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##C, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_14(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_ROW_13(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##D = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##D, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_15(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_ROW_14(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##E = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##E, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_16(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_ROW_15(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ BASENAME##F = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##F, A_VAL, B_VAL);
+/** @} */ // end of group ACTIVATION_ROW_n
+
+/** Apply activation to a block (BASENAME)
+ * @name ACTIVATION_BLOCK
+ *
+ * Supported cases are N=1,2,3,...,16.
+ *
+ * @param[in] N The number of vectors in the block
+ * @param[in] ACTIVATION_TYPE The type of the activation
+ * @param[in] DATA_TYPE The data type of the vectors
+ * @param[in] BASENAME The basename of the variables
+ * @param[in] A_VAL Additional value required by the activation
+ * @param[in] B_VAL Additional value required by the activation
+ * @{
+ */
+#define ACTIVATION_BLOCK_STR(N, ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_ROW_##N(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)
+#define ACTIVATION_BLOCK(N, ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+ ACTIVATION_BLOCK_STR(N, ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)
+/** @} */ // end of group ACTIVATION_BLOCK
+
+/** Apply convert_<data_type> to the given variables
+ * @name CONVERT_ROW_n
+ *
+ * @param[in] N The size of the vectors
+ * @param[in] DATA_TYPE The data type of the vectors
+ * @param[in] BASENAME_SRC The basename of the source variables
+ * @param[in] BASENAME_DST The basename of the destination variables
+ */
+#define CONVERT_ROW_1(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ VEC_DATA_TYPE(DATA_TYPE, N) \
+ BASENAME_DST##0 = CONVERT(BASENAME_SRC##0, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_2(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ CONVERT_ROW_1(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ VEC_DATA_TYPE(DATA_TYPE, N) \
+ BASENAME_DST##1 = CONVERT(BASENAME_SRC##1, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_3(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ CONVERT_ROW_2(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ VEC_DATA_TYPE(DATA_TYPE, N) \
+ BASENAME_DST##2 = CONVERT(BASENAME_SRC##2, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_4(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ CONVERT_ROW_3(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ VEC_DATA_TYPE(DATA_TYPE, N) \
+ BASENAME_DST##3 = CONVERT(BASENAME_SRC##3, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_5(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ CONVERT_ROW_4(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ VEC_DATA_TYPE(DATA_TYPE, N) \
+ BASENAME_DST##4 = CONVERT(BASENAME_SRC##4, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_6(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ CONVERT_ROW_5(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ VEC_DATA_TYPE(DATA_TYPE, N) \
+ BASENAME_DST##5 = CONVERT(BASENAME_SRC##5, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_7(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ CONVERT_ROW_6(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ VEC_DATA_TYPE(DATA_TYPE, N) \
+ BASENAME_DST##6 = CONVERT(BASENAME_SRC##6, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_8(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ CONVERT_ROW_7(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ VEC_DATA_TYPE(DATA_TYPE, N) \
+ BASENAME_DST##7 = CONVERT(BASENAME_SRC##7, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_9(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ CONVERT_ROW_8(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ VEC_DATA_TYPE(DATA_TYPE, N) \
+ BASENAME_DST##8 = CONVERT(BASENAME_SRC##8, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_10(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ CONVERT_ROW_9(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ VEC_DATA_TYPE(DATA_TYPE, N) \
+ BASENAME_DST##9 = CONVERT(BASENAME_SRC##9, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_11(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ CONVERT_ROW_10(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ VEC_DATA_TYPE(DATA_TYPE, N) \
+ BASENAME_DST##A = CONVERT(BASENAME_SRC##A, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_12(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ CONVERT_ROW_11(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ VEC_DATA_TYPE(DATA_TYPE, N) \
+ BASENAME_DST##B = CONVERT(BASENAME_SRC##B, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_13(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ CONVERT_ROW_12(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ VEC_DATA_TYPE(DATA_TYPE, N) \
+ BASENAME_DST##C = CONVERT(BASENAME_SRC##C, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_14(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ CONVERT_ROW_13(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ VEC_DATA_TYPE(DATA_TYPE, N) \
+ BASENAME_DST##D = CONVERT(BASENAME_SRC##D, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_15(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ CONVERT_ROW_14(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ VEC_DATA_TYPE(DATA_TYPE, N) \
+ BASENAME_DST##E = CONVERT(BASENAME_SRC##E, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_16(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ CONVERT_ROW_15(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ VEC_DATA_TYPE(DATA_TYPE, N) \
+ BASENAME_DST##F = CONVERT(BASENAME_SRC##F, VEC_DATA_TYPE(DATA_TYPE, N));
+/** @} */ // end of group CONVERT_ROW_n
+
+/** Apply convert_<data_type> to a block (BASENAME_SRC) and save to another block (BASENAME_DST)
+ * @name CONVERT_BLOCK
+ *
+ * Supported cases N=1,2,3,...,16.
+ *
+ * @param[in] M The number of vectors to convert
+ * @param[in] N The size of the vectors
+ * @param[in] DATA_TYPE The data type of the vectors
+ * @param[in] BASENAME_SRC The basename of the source variables
+ * @param[in] BASENAME_DST The basename of the destination variables
+ */
+#define CONVERT_BLOCK_STR(M, N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ CONVERT_ROW_##M(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
+#define CONVERT_BLOCK(M, N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+ CONVERT_BLOCK_STR(M, N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
+/** @} */ // end of group CONVERT_BLOCK
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Copyright (c) 2017-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "gemm_helpers.h"
+#include "helpers_asymm.h"
+#include "repeat.h"
+
+#if defined(DATA_TYPE) && defined(ACC_DATA_TYPE)
+
+#if defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
+#if defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) && \
+ defined(cl_arm_integer_dot_product_accumulate_int8)
+#define ARM_DOT(x, y, val) val = arm_dot_acc((x), (y), (val));
+#else // defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) &&
+ // defined(cl_arm_integer_dot_product_accumulate_int8)
+#define ARM_DOT(x, y, val) val += arm_dot((x), (y));
+#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) &&
+ // defined(cl_arm_integer_dot_product_accumulate_int8)
+#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
+
+#if defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
+
+/** Specialized macros to perform the dot product instruction between two vectors of size N [1,16].
+ * These macros use the dot8 instruction */
+#define ARM_DOT1(a, b, c) \
+ ({ \
+ ARM_DOT((VEC_DATA_TYPE(DATA_TYPE, 4))(a, (VEC_DATA_TYPE(DATA_TYPE, 3))0), \
+ (VEC_DATA_TYPE(DATA_TYPE, 4))(b, (VEC_DATA_TYPE(DATA_TYPE, 3))0), c); \
+ })
+#define ARM_DOT2(a, b, c) \
+ ({ \
+ ARM_DOT((VEC_DATA_TYPE(DATA_TYPE, 4))(a, (VEC_DATA_TYPE(DATA_TYPE, 2))0), \
+ (VEC_DATA_TYPE(DATA_TYPE, 4))(b, (VEC_DATA_TYPE(DATA_TYPE, 2))0), c); \
+ })
+#define ARM_DOT3(a, b, c) \
+ ({ \
+ ARM_DOT((VEC_DATA_TYPE(DATA_TYPE, 4))(a, (DATA_TYPE)0), \
+ (VEC_DATA_TYPE(DATA_TYPE, 4))(b, (DATA_TYPE)0), c); \
+ })
+#define ARM_DOT4(a, b, c) ({ ARM_DOT(a, b, c); })
+#define ARM_DOT8(a, b, c) \
+ ({ \
+ ARM_DOT4((a.lo), (b.lo), c); \
+ ARM_DOT4((a.hi), (b.hi), c); \
+ })
+#define ARM_DOT16(a, b, c) \
+ ({ \
+ ARM_DOT8((a.lo), (b.lo), c); \
+ ARM_DOT8((a.hi), (b.hi), c); \
+ })
+
+#else // defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
+
+/** Specialized macros to perform the dot product instruction between two vectors of size K0 [1,16]
+ * without using the dot8 instruction. */
+#define ARM_DOT1(a, b, c) ({ c += (ACC_DATA_TYPE)a * b; })
+#define ARM_DOT2(a, b, c) \
+ ({ \
+ c += (ACC_DATA_TYPE)a.s0 * b.s0; \
+ c += (ACC_DATA_TYPE)a.s1 * b.s1; \
+ })
+#define ARM_DOT3(a, b, c) \
+ ({ \
+ ARM_DOT2(a, b, c); \
+ c += (ACC_DATA_TYPE)a.s2 * b.s2; \
+ })
+#define ARM_DOT4(a, b, c) \
+ ({ \
+ ARM_DOT3(a, b, c); \
+ c += (ACC_DATA_TYPE)a.s3 * b.s3; \
+ })
+#define ARM_DOT8(a, b, c) \
+ ({ \
+ ARM_DOT4((a.lo), (b.lo), c); \
+ ARM_DOT4((a.hi), (b.hi), c); \
+ })
+#define ARM_DOT16(a, b, c) \
+ ({ \
+ ARM_DOT8((a.lo), (b.lo), c); \
+ ARM_DOT8((a.hi), (b.hi), c); \
+ })
+#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
+
+/** Specialized macros to perform a broadcast dot product operation between one vector "a" and N0
+ * vectors "b" of size K0 [1,16] */
+#define ARM_DOT_K0X1(k0, a, b, c) ({ ARM_DOT_K0(k0, (a), (b##0), (c)); })
+#define ARM_DOT_K0X2(k0, a, b, c) \
+ ({ \
+ ARM_DOT_K0(k0, (a), (b##0), (c.s0)); \
+ ARM_DOT_K0(k0, (a), (b##1), (c.s1)); \
+ })
+#define ARM_DOT_K0X3(k0, a, b, c) \
+ ({ \
+ ARM_DOT_K0X2(k0, a, b, c); \
+ ARM_DOT_K0(k0, (a), (b##2), (c.s2)); \
+ })
+#define ARM_DOT_K0X4(k0, a, b, c) \
+ ({ \
+ ARM_DOT_K0X3(k0, a, b, c); \
+ ARM_DOT_K0(k0, (a), (b##3), (c.s3)); \
+ })
+#define ARM_DOT_K0X8(k0, a, b, c) \
+ ({ \
+ ARM_DOT_K0X4(k0, a, b, c); \
+ ARM_DOT_K0(k0, (a), (b##4), (c.s4)); \
+ ARM_DOT_K0(k0, (a), (b##5), (c.s5)); \
+ ARM_DOT_K0(k0, (a), (b##6), (c.s6)); \
+ ARM_DOT_K0(k0, (a), (b##7), (c.s7)); \
+ })
+#define ARM_DOT_K0X16(k0, a, b, c) \
+ ({ \
+ ARM_DOT_K0X8(k0, a, b, c); \
+ ARM_DOT_K0(k0, (a), (b##8), (c.s8)); \
+ ARM_DOT_K0(k0, (a), (b##9), (c.s9)); \
+ ARM_DOT_K0(k0, (a), (b##A), (c.sA)); \
+ ARM_DOT_K0(k0, (a), (b##B), (c.sB)); \
+ ARM_DOT_K0(k0, (a), (b##C), (c.sC)); \
+ ARM_DOT_K0(k0, (a), (b##D), (c.sD)); \
+ ARM_DOT_K0(k0, (a), (b##E), (c.sE)); \
+ ARM_DOT_K0(k0, (a), (b##F), (c.sF)); \
+ })
+
+/** Specialized macros to perform a partial matrix multiplication with dimensions M0,N0,K0 */
+#define ARM_MM_K0XN0X1(n0, k0, a, b, c) ({ ARM_DOT_K0XN0(n0, k0, (a##0), b, (c##0)); })
+#define ARM_MM_K0XN0X2(n0, k0, a, b, c) \
+ ({ \
+ ARM_MM_K0XN0X1(n0, k0, a, b, c); \
+ ARM_DOT_K0XN0(n0, k0, (a##1), b, (c##1)); \
+ })
+#define ARM_MM_K0XN0X3(n0, k0, a, b, c) \
+ ({ \
+ ARM_MM_K0XN0X2(n0, k0, a, b, c); \
+ ARM_DOT_K0XN0(n0, k0, (a##2), b, (c##2)); \
+ })
+#define ARM_MM_K0XN0X4(n0, k0, a, b, c) \
+ ({ \
+ ARM_MM_K0XN0X3(n0, k0, a, b, c); \
+ ARM_DOT_K0XN0(n0, k0, (a##3), b, (c##3)); \
+ })
+#define ARM_MM_K0XN0X5(n0, k0, a, b, c) \
+ ({ \
+ ARM_MM_K0XN0X4(n0, k0, a, b, c); \
+ ARM_DOT_K0XN0(n0, k0, (a##4), b, (c##4)); \
+ })
+#define ARM_MM_K0XN0X6(n0, k0, a, b, c) \
+ ({ \
+ ARM_MM_K0XN0X5(n0, k0, a, b, c); \
+ ARM_DOT_K0XN0(n0, k0, (a##5), b, (c##5)); \
+ })
+#define ARM_MM_K0XN0X7(n0, k0, a, b, c) \
+ ({ \
+ ARM_MM_K0XN0X6(n0, k0, a, b, c); \
+ ARM_DOT_K0XN0(n0, k0, (a##6), b, (c##6)); \
+ })
+#define ARM_MM_K0XN0X8(n0, k0, a, b, c) \
+ ({ \
+ ARM_MM_K0XN0X7(n0, k0, a, b, c); \
+ ARM_DOT_K0XN0(n0, k0, (a##7), b, (c##7)); \
+ })
+
+#define ARM_DOT_K0(k0, a, b, c) \
+ ({ \
+ CONCAT(ARM_DOT, k0) \
+ ((a), (b), (c)); \
+ })
+
+#define ARM_DOT_K0XN0(n0, k0, a, b, c) \
+ ({ \
+ CONCAT(ARM_DOT_K0X, n0) \
+ (k0, (a), b, (c)); \
+ })
+
+#define ARM_MM_K0XN0XM0(m0, n0, k0, a, b, c) \
+ ({ \
+ CONCAT(ARM_MM_K0XN0X, m0) \
+ (n0, k0, a, b, c); \
+ })
+
+/** Specialized macros to perform a broadcast dot product operation between one vector "a" and N0
+ * vectors "b" of size K0 [1,16] */
+#define ARM_MUL_N0X1(VECTOR_ACC_TYPE, a, b, c) ({ c += CONVERT(b##0, VECTOR_ACC_TYPE) * a; })
+#define ARM_MUL_N0X2(VECTOR_ACC_TYPE, a, b, c) \
+ ({ \
+ c += CONVERT(b##0, VECTOR_ACC_TYPE) * a.s##0; \
+ c += CONVERT(b##1, VECTOR_ACC_TYPE) * a.s##1; \
+ })
+#define ARM_MUL_N0X3(VECTOR_ACC_TYPE, a, b, c) \
+ ({ \
+ ARM_MUL_N0X2(VECTOR_ACC_TYPE, a, b, c); \
+ c += CONVERT(b##2, VECTOR_ACC_TYPE) * a.s##2; \
+ })
+#define ARM_MUL_N0X4(VECTOR_ACC_TYPE, a, b, c) \
+ ({ \
+ ARM_MUL_N0X3(VECTOR_ACC_TYPE, a, b, c); \
+ c += CONVERT(b##3, VECTOR_ACC_TYPE) * a.s##3; \
+ })
+#define ARM_MUL_N0X8(VECTOR_ACC_TYPE, a, b, c) \
+ ({ \
+ ARM_MUL_N0X4(VECTOR_ACC_TYPE, a, b, c); \
+ c += CONVERT(b##4, VECTOR_ACC_TYPE) * a.s##4; \
+ c += CONVERT(b##5, VECTOR_ACC_TYPE) * a.s##5; \
+ c += CONVERT(b##6, VECTOR_ACC_TYPE) * a.s##6; \
+ c += CONVERT(b##7, VECTOR_ACC_TYPE) * a.s##7; \
+ })
+#define ARM_MUL_N0X16(VECTOR_ACC_TYPE, a, b, c) \
+ ({ \
+ ARM_MUL_N0X8(VECTOR_ACC_TYPE, a, b, c); \
+ c += CONVERT(b##8, VECTOR_ACC_TYPE) * a.s##8; \
+ c += CONVERT(b##9, VECTOR_ACC_TYPE) * a.s##9; \
+ c += CONVERT(b##A, VECTOR_ACC_TYPE) * a.s##A; \
+ c += CONVERT(b##B, VECTOR_ACC_TYPE) * a.s##B; \
+ c += CONVERT(b##C, VECTOR_ACC_TYPE) * a.s##C; \
+ c += CONVERT(b##D, VECTOR_ACC_TYPE) * a.s##D; \
+ c += CONVERT(b##E, VECTOR_ACC_TYPE) * a.s##E; \
+ c += CONVERT(b##F, VECTOR_ACC_TYPE) * a.s##F; \
+ })
+/** Specialized macros to perform a a partial matrix multiplication with dimensions M0,N0,K0 */
+#define ARM_MM_NATIVE_N0XK0X1(VECTOR_ACC_TYPE, k0, a, b, c) \
+ ({ ARM_MUL_N0XK0(VECTOR_ACC_TYPE, k0, (a##0), b, (c##0)); })
+#define ARM_MM_NATIVE_N0XK0X2(VECTOR_ACC_TYPE, k0, a, b, c) \
+ ({ \
+ ARM_MM_NATIVE_N0XK0X1(VECTOR_ACC_TYPE, k0, a, b, c); \
+ ARM_MUL_N0XK0(VECTOR_ACC_TYPE, k0, (a##1), b, (c##1)); \
+ })
+#define ARM_MM_NATIVE_N0XK0X3(VECTOR_ACC_TYPE, k0, a, b, c) \
+ ({ \
+ ARM_MM_NATIVE_N0XK0X2(VECTOR_ACC_TYPE, k0, a, b, c); \
+ ARM_MUL_N0XK0(VECTOR_ACC_TYPE, k0, (a##2), b, (c##2)); \
+ })
+#define ARM_MM_NATIVE_N0XK0X4(VECTOR_ACC_TYPE, k0, a, b, c) \
+ ({ \
+ ARM_MM_NATIVE_N0XK0X3(VECTOR_ACC_TYPE, k0, a, b, c); \
+ ARM_MUL_N0XK0(VECTOR_ACC_TYPE, k0, (a##3), b, (c##3)); \
+ })
+#define ARM_MM_NATIVE_N0XK0X5(VECTOR_ACC_TYPE, k0, a, b, c) \
+ ({ \
+ ARM_MM_NATIVE_N0XK0X4(VECTOR_ACC_TYPE, k0, a, b, c); \
+ ARM_MUL_N0XK0(VECTOR_ACC_TYPE, k0, (a##4), b, (c##4)); \
+ })
+#define ARM_MM_NATIVE_N0XK0X6(VECTOR_ACC_TYPE, k0, a, b, c) \
+ ({ \
+ ARM_MM_NATIVE_N0XK0X5(VECTOR_ACC_TYPE, k0, a, b, c); \
+ ARM_MUL_N0XK0(VECTOR_ACC_TYPE, k0, (a##5), b, (c##5)); \
+ })
+#define ARM_MM_NATIVE_N0XK0X7(VECTOR_ACC_TYPE, k0, a, b, c) \
+ ({ \
+ ARM_MM_NATIVE_N0XK0X6(VECTOR_ACC_TYPE, k0, a, b, c); \
+ ARM_MUL_N0XK0(VECTOR_ACC_TYPE, k0, (a##6), b, (c##6)); \
+ })
+#define ARM_MM_NATIVE_N0XK0X8(VECTOR_ACC_TYPE, k0, a, b, c) \
+ ({ \
+ ARM_MM_NATIVE_N0XK0X7(VECTOR_ACC_TYPE, k0, a, b, c); \
+ ARM_MUL_N0XK0(VECTOR_ACC_TYPE, k0, (a##7), b, (c##7)); \
+ })
+#define ARM_MUL_N0XK0(VECTOR_ACC_TYPE, k0, a, b, c) \
+ ({ \
+ CONCAT(ARM_MUL_N0X, k0) \
+ (VECTOR_ACC_TYPE, (a), b, (c)); \
+ })
+#define ARM_MM_NATIVE_N0XK0XM0(VECTOR_ACC_TYPE, m0, k0, a, b, c) \
+ ({ \
+ CONCAT(ARM_MM_NATIVE_N0XK0X, m0) \
+ (VECTOR_ACC_TYPE, k0, a, b, c); \
+ })
+
+#if defined(M0) && defined(N0) && defined(K0) && defined(V0) && defined(H0) && defined(M) && \
+ defined(N)
+/** This OpenCL kernel computes the matrix multiplication between 2 matrices with
+ * QASYMM/QASYMM_SIGNED data type. The LHS matrix must be reshaped with @ref
+ * CLGEMMReshapeLHSMatrixKernel and the M0xK0 must be NOT transposed The RHS matrix must be reshaped
+ * with @ref CLGEMMReshapeRHSMatrixKernel and the K0xN0 must be transposed
+ *
+ * @note The input data type must be passed at compile time using -DDATA_TYPE (i.e.
+ * -DDATA_TYPE=uchar)
+ * @note The accumulator data type must be passed at compile time using -DACC_DATA_TYPE (i.e.
+ * -DACC_DATA_TYPE=uint)
+ * @note If the first two dimensions of NDRange have been dispatched with "dummy_work_items"
+ * support, the option -DDUMMY_WORK_ITEMS must be passed at compile time.
+ * @note The GEMM's dimensions M and N must be passed at compile time using -DM and -DN (i.e. -DM=52
+ * and -DN=90).
+ * @note The block's dimensions used for reshaping the LHS matrix and the RHS matrix (M0, N0 and K0)
+ * must be passed at compile time using -DM0, -DN0 and -DK0 (i.e. -DM0=4, -DN0=8, -DK0=4).
+ * @note The number of M0xK0 vertical blocks stored on the same output row of the reshaped LHS
+ * matrix must be passed at compile time using -DV0 (i.e. -DV0=2)
+ * @note The number of K0xN0 horizontal blocks stored on the same output row of the reshaped RHS
+ * matrix must be passed at compile time using -DH0 (i.e. -DH0=2)
+ * @note If the M0xK0 blocks in the reshaped LHS matrix have been interleaved, the option
+ * -DLHS_INTERLEAVE must passed at compile time.
+ * @note If the K0xN0 blocks in the reshaped RHS matrix have been interleaved, the option
+ * -DRHS_INTERLEAVE must passed at compile time.
+ * @note Only the following configurations of M0, N0 and K0 are currently supported:
+ * - M0 = 2, 3, 4, 5, 6, 7, 8
+ * - N0 = 2, 3, 4, 8, 16
+ * - K0 = 2, 3, 4, 8, 16
+ * - V0 >= 1
+ * - H0 >= 1
+ *
+ * @note In case the output has to be reinterpreted as a 3D tensor (i.e. output of convolution
+ * layer), the following information must be passed at compile time:
+ * -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ * -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ * -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ * (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns LHS matrix NOT reshaped
+ *
+ * @param[in] lhs_ptr Pointer to the LHS reshaped matrix. Supported data
+ * type: QASYMM8/QASYMM_SIGNED
+ * @param[in] lhs_stride_x Stride of the LHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in] lhs_step_x src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in] lhs_stride_y Stride of the LHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in] lhs_step_y src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in] lhs_offset_first_element_in_bytes The offset of the first element in the LHS reshaped
+ * matrix
+ * @param[in] rhs_ptr Pointer to the RHS reshaped matrix. Supported data
+ * type: same as @p lhs_ptr
+ * @param[in] rhs_stride_x Stride of the RHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in] rhs_step_x src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in] rhs_stride_y Stride of the RHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in] rhs_step_y src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in] rhs_offset_first_element_in_bytes The offset of the first element in the RHS reshaped
+ * matrix
+ * @param[out] dst_ptr Pointer to the destination matrix Supported data
+ * type: S32
+ * @param[in] dst_stride_x Stride of the destination matrix in X dimension (in
+ * bytes)
+ * @param[in] dst_step_x dst_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination matrix in Y dimension (in
+ * bytes)
+ * @param[in] dst_step_y dst_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in] k Number of columns in LHS matrix and rows in RHS
+ * matrix not reshaped.
+ * @param[in] lhs_stride_z Stride of the LHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in] rhs_stride_z Stride of the RHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in] dst_cross_plane_pad (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemmlowp_mm_reshaped_lhs_nt_rhs_t(IMAGE_DECLARATION(lhs), IMAGE_DECLARATION(rhs),
+ IMAGE_DECLARATION(dst), uint k, uint lhs_stride_z,
+ uint rhs_stride_z, uint dst_stride_z
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ ,
+ uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+ // Block size
+#define LHS_BLOCK_SIZE ((K0) * (M0))
+
+#if defined(LHS_INTERLEAVE)
+#define LHS_OFFSET_X (K0)
+#define LHS_STEP_X ((K0) * (V0))
+#define LHS_STEP_LOOP (1)
+#else // defined(INTERLEAVE)
+#define LHS_OFFSET_X (LHS_BLOCK_SIZE)
+#define LHS_STEP_X (K0)
+#define LHS_STEP_LOOP (V0)
+#endif // defined(INTERLEAVE)
+
+ // Block size
+#define RHS_BLOCK_SIZE ((K0) * (N0))
+
+ // RHS offset and step X
+#if defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (K0)
+#define RHS_STEP_X ((K0) * (H0))
+#define RHS_STEP_LOOP (1)
+#else // defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (RHS_BLOCK_SIZE)
+#define RHS_STEP_X (K0)
+#define RHS_STEP_LOOP (H0)
+#endif // defined(RHS_INTERLEAVE)
+
+ uint x = get_global_id(0);
+ uint y = get_global_id(1);
+ uint z = get_global_id(2);
+
+#if defined(DUMMY_WORK_ITEMS)
+ if ((x * N0 >= N) || (y * M0 >= M))
+ {
+ return;
+ }
+#endif // defined(DUMMY_WORK_ITEMS)
+
+ // Compute LHS matrix address
+ __global DATA_TYPE *lhs_addr = lhs_ptr + lhs_offset_first_element_in_bytes +
+ (y % V0) * (uint)LHS_OFFSET_X + (y / V0) * (uint)lhs_stride_y +
+ (z * lhs_stride_z);
+
+ // Compute RHS matrix address
+ __global DATA_TYPE *rhs_addr = rhs_ptr + rhs_offset_first_element_in_bytes +
+ (x % H0) * (uint)RHS_OFFSET_X + (x / (uint)H0) * rhs_stride_y;
+
+#if defined(MATRIX_B_DEPTH)
+ // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+ rhs_addr += (z % MATRIX_B_DEPTH) * rhs_stride_z;
+#else // defined(MATRIX_B_DEPTH)
+ rhs_addr += z * rhs_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+ REPEAT_VAR_INIT_TO_CONST(8, uint, zlhs, 0); // uint zout0=0,zout1=0,zout2=0,... zout7=0;
+ REPEAT_VAR_INIT_TO_CONST(16, uint, zrhs, 0);
+
+ // Initialize the accumulators
+ REPEAT_VAR_INIT_TO_CONST(M0, VEC_DATA_TYPE(ACC_DATA_TYPE, N0), c,
+ 0); // VEC_DATA_TYPE(ACC_DATA_TYPE, N0) c0=0,c1=0,c2=0,... c(M0-1)=0;
+
+ for (int i = 0; i < k; i += K0)
+ {
+ // Load values from LHS matrix
+ LOAD_BLOCK(M0, K0, DATA_TYPE, a, lhs_addr, 0, LHS_STEP_X, zlhs);
+
+ // Load values from RHS matrix
+ LOAD_BLOCK(N0, K0, DATA_TYPE, b, rhs_addr, 0, RHS_STEP_X, zrhs);
+
+ // Partial matrix multiplication M0,N0,K0
+ ARM_MM_K0XN0XM0(M0, N0, K0, a, b, c);
+
+ // Update address
+ lhs_addr += (M0 * LHS_STEP_X * LHS_STEP_LOOP);
+ rhs_addr += (N0 * RHS_STEP_X * RHS_STEP_LOOP);
+ }
+
+ __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes +
+ (x * (uint)N0 * sizeof(int)) + (y * (uint)M0 * dst_stride_y);
+
+ REPEAT_VAR_INIT_TO_CONST(8, uint, zout, 0); // uint zout0=0,zout1=0,zout2=0,... zout7=0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ // The plane (zout) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+ CALCULATE_Z_OFFSET(M0, uint, zout, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad,
+ dst_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply dst_stride_z by DEPTH_GEMM3D
+ dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Add offset for batched GEMM
+ dst_addr += z * dst_stride_z;
+
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Convert and store output block
+ CONVERT_STORE_BLOCK(M0, N0, int, c, dst_addr, dst_stride_y, zout);
+
+#undef LHS_BLOCK_SIZE
+#undef LHS_OFFSET_X
+#undef LHS_STEP_X
+#undef RHS_BLOCK_SIZE
+#undef RHS_OFFSET_X
+#undef RHS_STEP_X
+}
+#endif // defined(M0) && defined(N0) && defined(K0) && defined(V0) && defined(H0) && defined(K)
+
+#if defined(M0) && defined(N0) && defined(K0) && defined(H0) && defined(K)
+
+/** This OpenCL kernel computes the matrix multiplication between 2 matrices.
+ * The LHS matrix is NOT reshaped
+ * The RHS matrix is reshaped with @ref CLGEMMReshapeRHSMatrixKernel and the block K0xN0 is
+ * transposed
+ *
+ * @note The input data type must be passed at compile time using -DDATA_TYPE (i.e.
+ * -DDATA_TYPE=uchar)
+ * @note The accumulator data type must be passed at compile time using -DACC_DATA_TYPE (i.e.
+ * -DACC_DATA_TYPE=uint)
+ * @note The number of columns of LHS matrix must be passed at compile time using -DK (i.e. -DK=64)
+ * @note The block's dimensions used for reshaping the RHS matrix (N0 and K0) must be passed at
+ * compile time using -DN0 and -DK0 (i.e. -DN0=8, -DK0=4).
+ * @note The number of M0 rows to process must be passed at compile time using -DM0 (i.e. -DM0=2)
+ * @note The number of K0xN0 horizontal blocks stored on the same output row of the reshaped RHS
+ * matrix must be passed at compile time using -DH0 (i.e. -DH0=2)
+ * @note If the K0xN0 blocks in the reshaped RHS matrix have been interleaved, the option
+ * -DRHS_INTERLEAVE must passed at compile time.
+ * @note Only the following configurations of M0, N0 and K0 are currently supported:
+ * - M0 = 1, 2, 3, 4, 5, 6, 7, 8
+ * - N0 = 2, 3, 4, 8, 16
+ * - K0 = 2, 3, 4, 8, 16
+ * - H0 >= 1
+ *
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ * -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ * -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ * -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ * -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ * (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns LHS matrix
+ *
+ * @param[in] lhs_ptr Pointer to the LHS reshaped matrix. Supported data
+ * type: QASYMM8/QASYMM8_SIGNED
+ * @param[in] lhs_stride_x Stride of the LHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in] lhs_step_x src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in] lhs_stride_y Stride of the LHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in] lhs_step_y src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in] lhs_offset_first_element_in_bytes The offset of the first element in the LHS reshaped
+ * matrix
+ * @param[in] rhs_ptr Pointer to the RHS reshaped matrix. Supported data
+ * type: same as @p lhs_ptr
+ * @param[in] rhs_stride_x Stride of the RHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in] rhs_step_x src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in] rhs_stride_y Stride of the RHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in] rhs_step_y src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in] rhs_offset_first_element_in_bytes The offset of the first element in the RHS reshaped
+ * matrix
+ * @param[out] dst_ptr Pointer to the destination matrix Supported data
+ * type: S32
+ * @param[in] dst_stride_x Stride of the destination matrix in X dimension (in
+ * bytes)
+ * @param[in] dst_step_x dst_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination matrix in Y dimension (in
+ * bytes)
+ * @param[in] dst_step_y dst_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in] lhs_stride_z Stride of the LHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in] rhs_stride_z Stride of the RHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in] lhs_cross_plane_pad (Optional) Bottom paddings for LHS matrix in unit
+ * of elements (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in] dst_cross_plane_pad (Optional) Bottom paddings for the output matrix in
+ * unit of elements (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemmlowp_mm_reshaped_only_rhs_t(IMAGE_DECLARATION(lhs), IMAGE_DECLARATION(rhs),
+ IMAGE_DECLARATION(dst), uint lhs_stride_z,
+ uint rhs_stride_z, uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+ ,
+ uint lhs_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ ,
+ uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+ // Block size
+#define RHS_BLOCK_SIZE ((K0) * (N0))
+
+ // RHS offset and step X
+#if defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (K0)
+#define RHS_STEP_X ((K0) * (H0))
+#define RHS_STEP_LOOP (1)
+#else // defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (RHS_BLOCK_SIZE)
+#define RHS_STEP_X (K0)
+#define RHS_STEP_LOOP (H0)
+#endif // defined(RHS_INTERLEAVE)
+
+ uint x = get_global_id(0);
+ uint y = get_global_id(1);
+ uint z = get_global_id(2);
+
+#if defined(DUMMY_WORK_ITEMS)
+ if ((x * N0 >= N) || (y * M0 >= M))
+ {
+ return;
+ }
+#endif // defined(DUMMY_WORK_ITEMS)
+
+ // Compute LHS matrix address
+ uint lhs_offset = lhs_offset_first_element_in_bytes + y * M0 * (uint)lhs_stride_y;
+
+ // Compute RHS matrix address
+ uint rhs_offset = rhs_offset_first_element_in_bytes + (x % H0) * (uint)RHS_OFFSET_X +
+ (x / (uint)H0) * rhs_stride_y;
+
+#if defined(MATRIX_B_DEPTH)
+ // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+ rhs_offset += (z % MATRIX_B_DEPTH) * rhs_stride_z;
+#else // defined(MATRIX_B_DEPTH)
+ rhs_offset += z * rhs_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+ REPEAT_VAR_INIT_TO_CONST(8, uint, zlhs, 0); // uint zout0=0,zout1=0,zout2=0,... zout7=0;
+ REPEAT_VAR_INIT_TO_CONST(16, uint, zrhs, 0);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+ // The plane (zlhs) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+ CALCULATE_Z_OFFSET(M0, uint, zlhs, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, lhs_cross_plane_pad,
+ lhs_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply lhs_stride_z by DEPTH_GEMM3D
+ lhs_offset += z * lhs_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+ // Add offset for batched GEMM
+ lhs_offset += z * lhs_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+ // Initialize the accumulators
+ REPEAT_VAR_INIT_TO_CONST(M0, VEC_DATA_TYPE(ACC_DATA_TYPE, N0), c,
+ 0); // VEC_DATA_TYPE(ACC_DATA_TYPE, N0) c0=0,c1=0,c2=0,... c(N0-1)=0;
+
+ for (int i = 0; i < K; i += K0)
+ {
+ // Load values from LHS matrix
+ LOAD_BLOCK(M0, K0, DATA_TYPE, a, lhs_ptr, lhs_offset, lhs_stride_y, zlhs);
+
+ // Load values from RHS matrix
+ LOAD_BLOCK(N0, K0, DATA_TYPE, b, rhs_ptr, rhs_offset, RHS_STEP_X, zrhs);
+
+ // Partial matrix multiplication M0,N0,K0
+ ARM_MM_K0XN0XM0(M0, N0, K0, a, b, c);
+
+ lhs_offset += K0;
+ rhs_offset += N0 * RHS_STEP_X * RHS_STEP_LOOP;
+ }
+
+ __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes +
+ (x * (uint)N0) * sizeof(int) + (y * (uint)M0 * dst_stride_y);
+
+ REPEAT_VAR_INIT_TO_CONST(8, uint, zout, 0); // uint zout0=0,zout1=0,zout2=0,... zout7=0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ // The plane (zout) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+ CALCULATE_Z_OFFSET(M0, uint, zout, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad,
+ dst_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply dst_stride_z by DEPTH_GEMM3D
+ dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Add offset for batched GEMM
+ dst_addr += z * dst_stride_z;
+
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Convert and store output block
+ CONVERT_STORE_BLOCK(M0, N0, int, c, dst_addr, dst_stride_y, zout);
+
+#undef RHS_BLOCK_SIZE
+#undef RHS_OFFSET_X
+#undef RHS_STEP_X
+}
+
+#if defined(RESULT_OFFSET) && defined(RESULT_SHIFT) && defined(RESULT_MULTIPLIER)
+/** This OpenCL kernel computes the matrix multiplication between 2 matrices with fused output stage
+ * using fixed-point arithmetic. The LHS matrix is NOT reshaped The RHS matrix is reshaped with @ref
+ * CLGEMMReshapeRHSMatrixKernel and the block K0xN0 is transposed
+ *
+ * @note The input data type must be passed at compile time using -DDATA_TYPE (i.e.
+ * -DDATA_TYPE=uchar)
+ * @note The accumulator data type must be passed at compile time using -DACC_DATA_TYPE (i.e.
+ * -DACC_DATA_TYPE=uint)
+ * @note The number of columns of LHS matrix must be passed at compile time using -DK (i.e. -DK=64)
+ * @note The block's dimensions used for reshaping the RHS matrix (N0 and K0) must be passed at
+ * compile time using -DN0 and -DK0 (i.e. -DN0=8, -DK0=4).
+ * @note The number of M0 rows to process must be passed at compile time using -DM0 (i.e. -DM0=2)
+ * @note The number of K0xN0 horizontal blocks stored on the same output row of the reshaped RHS
+ * matrix must be passed at compile time using -DH0 (i.e. -DH0=2)
+ * @note If the K0xN0 blocks in the reshaped RHS matrix have been interleaved, the option
+ * -DRHS_INTERLEAVE must passed at compile time.
+ * @note Only the following configurations of M0, N0 and K0 are currently supported:
+ * - M0 = 1, 2, 3, 4, 5, 6, 7, 8
+ * - N0 = 2, 3, 4, 8, 16
+ * - K0 = 2, 3, 4, 8, 16
+ * - H0 >= 1
+ *
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ * -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ * -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ * -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ * -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ * (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns LHS matrix
+ *
+ * @note The offset, scalar scale factor and number of bits to shift right of output tensor must be
+ * passed at compile time using -DRESULT_OFFSET, -RESULT_MULTIPLIER and -DRESULT_SHIFT
+ * @note In case the addition of int32 biases is required, -DADD_BIAS should be passed at compile
+ * time
+ * @note The output datatype should be passed at compile time using -DOUTPUT_DATA_TYPE
+ * @note In case the clamping of the result is required, the min and max bounds can be passed at
+ * compile time using -DMIN_BOUND and -DMAX_BOUND. These values can be used to implement "rectified
+ * linear unit" activation functions
+ * @note In case of per-channel quantization of matrix B, -DPER_CHANNEL_QUANTIZATION must be passed
+ * at compile time.
+ *
+ * @param[in] lhs_ptr Pointer to the LHS reshaped matrix.
+ * Supported data type: QASYMM8/QASYMM8_SIGNED
+ * @param[in] lhs_stride_x Stride of the LHS reshaped matrix in
+ * X dimension (in bytes)
+ * @param[in] lhs_step_x src_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] lhs_stride_y Stride of the LHS reshaped matrix in
+ * Y dimension (in bytes)
+ * @param[in] lhs_step_y src_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in] lhs_offset_first_element_in_bytes The offset of the first element in
+ * the LHS reshaped matrix
+ * @param[in] rhs_ptr Pointer to the RHS reshaped matrix.
+ * Supported data type: same as @p lhs_ptr
+ * @param[in] rhs_stride_x Stride of the RHS reshaped matrix in
+ * X dimension (in bytes)
+ * @param[in] rhs_step_x src_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] rhs_stride_y Stride of the RHS reshaped matrix in
+ * Y dimension (in bytes)
+ * @param[in] rhs_step_y src_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in] rhs_offset_first_element_in_bytes The offset of the first element in
+ * the RHS reshaped matrix
+ * @param[out] dst_ptr Pointer to the destination matrix
+ * Supported data type: same as @p lhs_ptr
+ * @param[in] dst_stride_x Stride of the destination matrix in
+ * X dimension (in bytes)
+ * @param[in] dst_step_x dst_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination matrix in
+ * Y dimension (in bytes)
+ * @param[in] dst_step_y dst_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in
+ * the destination matrix
+ * @param[in] lhs_stride_z Stride of the LHS reshaped matrix in
+ * Z dimension (in bytes)
+ * @param[in] rhs_stride_z Stride of the RHS reshaped matrix in
+ * Z dimension (in bytes)
+ * @param[in] dst_stride_z Stride of the destination tensor in
+ * Z dimension (in bytes)
+ * @param[in] lhs_cross_plane_pad (Optional) Bottom paddings for LHS
+ * matrix in unit of elements (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in] dst_cross_plane_pad (Optional) Bottom paddings for the
+ * output matrix in unit of elements (only if defined REINTERPRET_OUTPUT_AS_3D)
+ * @param[in] sum_col_ptr (Optional) Pointer to the source
+ * tensor. Supported data type: S32
+ * @param[in] sum_col_stride_x (Optional) Stride of the source
+ * tensor in X dimension (in bytes)
+ * @param[in] sum_col_step_x (Optional) sum_col_stride_x * number
+ * of elements along X processed per workitem(in bytes)
+ * @param[in] sum_col_stride_y (Optional) Stride of the source
+ * tensor in Y dimension (in bytes)
+ * @param[in] sum_col_step_y (Optional) sum_col_stride_y * number
+ * of elements along Y processed per workitem(in bytes)
+ * @param[in] sum_col_offset_first_element_in_bytes (Optional) The offset of the first
+ * element in the source tensor
+ * @param[in] sum_row_ptr (Optional) Pointer to the source
+ * tensor. Supported data type: S32
+ * @param[in] sum_row_stride_x (Optional) Stride of the source
+ * tensor in X dimension (in bytes)
+ * @param[in] sum_row_step_x (Optional) sum_row_stride_x * number
+ * of elements along X processed per workitem(in bytes)
+ * @param[in] sum_row_stride_y (Optional) Stride of the source
+ * tensor in Y dimension (in bytes)
+ * @param[in] sum_row_step_y (Optional) sum_row_stride_y * number
+ * of elements along Y processed per workitem(in bytes)
+ * @param[in] sum_row_offset_first_element_in_bytes (Optional) The offset of the first
+ * element in the source tensor
+ * @param[in] biases_ptr (Optional) Pointer to the biases
+ * tensor. Supported data type: S32
+ * @param[in] biases_stride_x (Optional) Stride of the biases
+ * tensor in X dimension (in bytes)
+ * @param[in] biases_step_x (Optional) biases_stride_x * number
+ * of elements along X processed per workitem(in bytes)
+ * @param[in] biases_offset_first_element_in_bytes (Optional) The offset of the first
+ * element in the biases tensor
+ * @param[in] result_multipliers_ptr (Optional) Pointer to the output
+ * multipliers vector for per-channel quantization. Supported data types: S32
+ * @param[in] result_multipliers_stride_x (Optional) Stride of the output
+ * multipliers vector in X dimension (in bytes)
+ * @param[in] result_multipliers_step_x (Optional)
+ * output_multipliers_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] result_multipliers_offset_first_element_in_bytes (Optional) The offset of the first
+ * element in the output multipliers vector
+ * @param[in] result_shifts_ptr (Optional) Pointer to the output
+ * shifts vector for per-channel quantization. Supported data types: S32
+ * @param[in] result_shifts_stride_x (Optional) Stride of the output
+ * shifts vector in X dimension (in bytes)
+ * @param[in] result_shifts_step_x (Optional) output_shifts_stride_x *
+ * number of elements along X processed per workitem(in bytes)
+ * @param[in] result_shifts_offset_first_element_in_bytes (Optional) The offset of the first
+ * element in the output shifts vector
+ */
+__kernel void gemmlowp_mm_reshaped_only_rhs_t_fused_output_stage_fixedpoint(
+ IMAGE_DECLARATION(lhs), IMAGE_DECLARATION(rhs), IMAGE_DECLARATION(dst), uint lhs_stride_z,
+ uint rhs_stride_z, uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+ ,
+ uint lhs_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ ,
+ uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+#if defined(A_OFFSET)
+ ,
+ IMAGE_DECLARATION(sum_col)
+#endif // defined(A_OFFSET)
+#if defined(B_OFFSET)
+ ,
+ IMAGE_DECLARATION(sum_row)
+#endif // defined(B_OFFSET)
+#if defined(ADD_BIAS)
+ ,
+ VECTOR_DECLARATION(biases)
+#endif // defined(ADD_BIAS)
+#if defined(PER_CHANNEL_QUANTIZATION)
+ ,
+ VECTOR_DECLARATION(result_multipliers), VECTOR_DECLARATION(result_shifts)
+#endif // defined(PER_CHANNEL_QUANTIZATION)
+)
+{
+ // Block size
+#define RHS_BLOCK_SIZE ((K0) * (N0))
+
+ // RHS offset and step X
+#if defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (K0)
+#define RHS_STEP_X ((K0) * (H0))
+#define RHS_STEP_LOOP (1)
+#else // defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (RHS_BLOCK_SIZE)
+#define RHS_STEP_X (K0)
+#define RHS_STEP_LOOP (H0)
+#endif // defined(RHS_INTERLEAVE)
+
+ uint x = get_global_id(0);
+ uint y = get_global_id(1);
+ uint z = get_global_id(2);
+
+#if defined(DUMMY_WORK_ITEMS)
+ if ((x * N0 >= N) || (y * M0 >= M))
+ {
+ return;
+ }
+#endif // defined(DUMMY_WORK_ITEMS)
+
+ // Compute LHS matrix address
+ uint lhs_offset = lhs_offset_first_element_in_bytes + y * M0 * (uint)lhs_stride_y;
+
+ // Compute RHS matrix address
+ uint rhs_offset = rhs_offset_first_element_in_bytes + (x % H0) * (uint)RHS_OFFSET_X +
+ (x / (uint)H0) * rhs_stride_y;
+
+#if defined(MATRIX_B_DEPTH)
+ // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+ rhs_offset += (z % MATRIX_B_DEPTH) * rhs_stride_z;
+#else // defined(MATRIX_B_DEPTH)
+ rhs_offset += z * rhs_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+ REPEAT_VAR_INIT_TO_CONST(8, uint, zlhs, 0); // uint zout0=0,zout1=0,zout2=0,... zout7=0;
+ REPEAT_VAR_INIT_TO_CONST(16, uint, zrhs, 0);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+ // The plane (zlhs) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+ CALCULATE_Z_OFFSET(M0, uint, zlhs, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, lhs_cross_plane_pad,
+ lhs_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply lhs_stride_z by DEPTH_GEMM3D
+ lhs_offset += z * lhs_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+ // Add offset for batched GEMM
+ lhs_offset += z * lhs_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+ // Initialize the accumulators
+ REPEAT_VAR_INIT_TO_CONST(M0, VEC_DATA_TYPE(ACC_DATA_TYPE, N0), c,
+ 0); // VEC_DATA_TYPE(ACC_DATA_TYPE, N0) c0=0,c1=0,c2=0,... c(N0-1)=0;
+
+ for (int i = 0; i < K; i += K0)
+ {
+ // Load values from LHS matrix
+ LOAD_BLOCK(M0, K0, DATA_TYPE, a, lhs_ptr, lhs_offset, lhs_stride_y, zlhs);
+
+ // Load values from RHS matrix
+ LOAD_BLOCK(N0, K0, DATA_TYPE, b, rhs_ptr, rhs_offset, RHS_STEP_X, zrhs);
+
+ // Partial matrix multiplication M0,N0,K0
+ ARM_MM_K0XN0XM0(M0, N0, K0, a, b, c);
+
+ lhs_offset += K0;
+ rhs_offset += N0 * RHS_STEP_X * RHS_STEP_LOOP;
+ }
+
+ // Result of MM is of type DATA_TYPE
+ __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes +
+ (x * (uint)N0) * sizeof(DATA_TYPE) + (y * (uint)M0 * dst_stride_y);
+
+ REPEAT_VAR_INIT_TO_CONST(8, uint, zout, 0); // uint zout0=0,zout1=0,zout2=0,... zout7=0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ // The plane (zout) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+ CALCULATE_Z_OFFSET(M0, uint, zout, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad,
+ dst_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply dst_stride_z by DEPTH_GEMM3D
+ dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Add offset for batched GEMM
+ dst_addr += z * dst_stride_z;
+
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Convert result of matrix multiplication to S32
+ REPEAT_VAR_INIT_CONVERT_SAT(M0, VEC_DATA_TYPE(int, N0), c, c_int);
+
+ int batch_id = z;
+#if defined(DEPTH_GEMM3D)
+ batch_id /= (int)DEPTH_GEMM3D;
+#endif // defined(DEPTH_GEMM3D)
+
+ // Offset contribution: c += (A_OFFSET * sum_col) + (B_OFFSET * sum_row) + K_OFFSET;
+ REPEAT_VAR_INIT_TO_CONST(M0, VEC_DATA_TYPE(int, N0), offset_s32_, K_OFFSET);
+
+#if defined(A_OFFSET)
+ // Compute the offset contribution due to A_OFFSET
+ __global uchar *sum_col_addr =
+ sum_col_ptr + sum_col_offset_first_element_in_bytes + (x * (uint)N0) * sizeof(int);
+
+#if defined(SUM_COL_HAS_BATCHES)
+ sum_col_addr += z * sum_col_stride_y;
+#endif // defined(SUM_COL_HAS_BATCHES)
+ VEC_DATA_TYPE(int, N0)
+ a_offset_s32 = VLOAD(N0)(0, (__global int *)sum_col_addr);
+ a_offset_s32 *= (VEC_DATA_TYPE(int, N0))A_OFFSET;
+
+ REPEAT_ADD_VECTOR_TO_VAR(M0, offset_s32_, a_offset_s32);
+#endif // defined(A_OFFSET)
+
+#if defined(B_OFFSET)
+ // Compute the offset contribution due to B_OFFSET
+ __global uchar *sum_row_addr = sum_row_ptr + sum_row_offset_first_element_in_bytes +
+ (y * (uint)M0) * sizeof(int) + z * sum_row_stride_y;
+
+#if defined(HEIGHT_GEMM3D) && defined(DEPTH_GEMM3D)
+ sum_row_addr += (batch_id % (int)DEPTH_GEMM3D) * (int)HEIGHT_GEMM3D * sizeof(int);
+#endif // defined(HEIGHT_GEMM3D) && defined(DEPTH_GEMM3D)
+ LOAD_SCALAR_AS_VECTOR(M0, N0, int, b_offset_s32_, sum_row_addr, 0, sum_row_stride_x);
+
+ REPEAT_MLA_VAR_WITH_CONST_VEC(M0, offset_s32_, b_offset_s32_, (VEC_DATA_TYPE(int, N0))B_OFFSET);
+#endif // defined(B_OFFSET)
+
+#if defined(ADD_BIAS)
+ // Add bias
+ __global uchar *bias_addr =
+ biases_ptr + biases_offset_first_element_in_bytes + (x * (uint)N0) * sizeof(int);
+
+ VEC_DATA_TYPE(int, N0)
+ bias_values = VLOAD(N0)(0, (__global int *)bias_addr);
+ REPEAT_ADD_VECTOR_TO_VAR(M0, offset_s32_, bias_values);
+#endif // defined(ADD_BIAS)
+
+ REPEAT_ADD_TWO_VARS(M0, c_int, offset_s32_);
+
+ // Multiply by result_mult_int and shift
+#if defined(PER_CHANNEL_QUANTIZATION)
+ __global uchar *result_multipliers_addr = result_multipliers_ptr +
+ result_multipliers_offset_first_element_in_bytes +
+ (x * (uint)N0) * sizeof(int);
+ __global uchar *result_shifts_addr =
+ result_shifts_ptr + result_shifts_offset_first_element_in_bytes + (x * (uint)N0) * sizeof(int);
+
+ VEC_DATA_TYPE(int, N0)
+ res_mul = VLOAD(N0)(0, (__global int *)result_multipliers_addr);
+ VEC_DATA_TYPE(int, N0)
+ res_shift = VLOAD(N0)(0, (__global int *)result_shifts_addr);
+
+ REPEAT_ASYMM_MULT_BY_QUANT_MULTIPLIER_PER_CHANNEL(M0, N0, c_int, res_mul, res_shift);
+#else // defined(PER_CHANNEL_QUANTIZATION)
+
+#if RESULT_SHIFT < 0
+ REPEAT_ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(M0, N0, c_int, RESULT_MULTIPLIER,
+ RESULT_SHIFT);
+#else // RESULT_SHIFT >= 0
+ REPEAT_ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(M0, N0, c_int, RESULT_MULTIPLIER,
+ RESULT_SHIFT);
+#endif // RESULT_SHIFT < 0
+
+#endif // defined(PER_CHANNEL_QUANTIZATION)
+
+ // Add the offset terms to GEMM's result
+ REPEAT_ADD_CONST_TO_VAR(M0, VEC_DATA_TYPE(int, N0), c_int, RESULT_OFFSET);
+
+#if defined(MIN_BOUND)
+ REPEAT_MAX_CONST_VAR(M0, VEC_DATA_TYPE(int, N0), c_int, MIN_BOUND);
+#endif // defined(MIN_BOUND)
+#if defined(MAX_BOUND)
+ REPEAT_MIN_CONST_VAR(M0, VEC_DATA_TYPE(int, N0), c_int, MAX_BOUND);
+#endif // defined(MAX_BOUND)
+
+ // Convert and store output block (does convert saturate)
+ CONVERT_STORE_BLOCK(M0, N0, DATA_TYPE, c_int, dst_addr, dst_stride_y, zout);
+
+#undef RHS_BLOCK_SIZE
+#undef RHS_OFFSET_X
+#undef RHS_STEP_X
+}
+#endif // defined(RESULT_OFFSET) && defined(RESULT_SHIFT) && defined(RESULT_MULTIPLIER)
+#endif // defined(M0) && defined(N0) && defined(K0) && defined(H0) && defined(DATA_TYPE) &&
+ // defined(K)
+
+#if defined(M0) && defined(N0) && defined(K0) && defined(K)
+
+/** This OpenCL kernel computes the matrix multiplication between 2 matrices.
+ * The LHS matrix is NOT reshaped
+ * The RHS matrix is NOT reshaped
+ *
+ * @note The input data type must be passed at compile time using -DDATA_TYPE (i.e.
+ * -DDATA_TYPE=uchar)
+ * @note The accumulator data type must be passed at compile time using -DACC_DATA_TYPE (i.e.
+ * -DACC_DATA_TYPE=uint)
+ * @note The number of columns of LHS matrix must be passed at compile time using -DK (i.e. -DK=64)
+ * @note The number of M0 rows to process must be passed at compile time using -DM0 (i.e. -DM0=2)
+ * @note The number of N0 columns to process must be passed at compile time using -DN0 (i.e. -DN0=2)
+ * @note The number of K0 partial accumulations must be passed at compile time using -DK0 (i.e.,
+ * -DK0=2)
+ * @note Only the following configurations of M0, N0 and K0 are currently supported:
+ * - M0 = 1, 2, 3, 4, 5, 6, 7, 8
+ * - N0 = 2, 3, 4, 8, 16
+ * - K0 = 2, 3, 4, 8, 16
+ *
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ * -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ * -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ * -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ * -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ * (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns LHS matrix
+ *
+ * @param[in] lhs_ptr Pointer to the LHS reshaped matrix. Supported data
+ * type: QASYMM8
+ * @param[in] lhs_stride_x Stride of the LHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in] lhs_step_x src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in] lhs_stride_y Stride of the LHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in] lhs_step_y src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in] lhs_offset_first_element_in_bytes The offset of the first element in the LHS reshaped
+ * matrix
+ * @param[in] rhs_ptr Pointer to the RHS reshaped matrix. Supported data
+ * type: same as @p lhs_ptr
+ * @param[in] rhs_stride_x Stride of the RHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in] rhs_step_x src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in] rhs_stride_y Stride of the RHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in] rhs_step_y src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in] rhs_offset_first_element_in_bytes The offset of the first element in the RHS reshaped
+ * matrix
+ * @param[out] dst_ptr Pointer to the destination matrix Supported data
+ * type: S32
+ * @param[in] dst_stride_x Stride of the destination matrix in X dimension (in
+ * bytes)
+ * @param[in] dst_step_x dst_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination matrix in Y dimension (in
+ * bytes)
+ * @param[in] dst_step_y dst_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in] lhs_stride_z Stride of the LHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in] rhs_stride_z Stride of the RHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in] lhs_cross_plane_pad (Optional) Bottom paddings for LHS matrix in unit
+ * of elements (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in] dst_cross_plane_pad (Optional) Bottom paddings for the output matrix in
+ * unit of elements (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemmlowp_mm_native(IMAGE_DECLARATION(lhs), IMAGE_DECLARATION(rhs),
+ IMAGE_DECLARATION(dst), uint lhs_stride_z, uint rhs_stride_z,
+ uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+ ,
+ uint lhs_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ ,
+ uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+ uint x = get_global_id(0);
+ uint y = get_global_id(1);
+ uint z = get_global_id(2);
+
+#if defined(DUMMY_WORK_ITEMS)
+ if ((x * N0 >= N) || (y * M0 >= M))
+ {
+ return;
+ }
+#endif // defined(DUMMY_WORK_ITEMS)
+
+ // Compute LHS matrix address
+ uint lhs_offset = lhs_offset_first_element_in_bytes + y * M0 * (uint)lhs_stride_y;
+
+ // Compute RHS matrix address
+ uint rhs_offset = rhs_offset_first_element_in_bytes + x * N0;
+
+#if defined(MATRIX_B_DEPTH)
+ // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+ rhs_offset += (z % MATRIX_B_DEPTH) * rhs_stride_z;
+#else // defined(MATRIX_B_DEPTH)
+ rhs_offset += z * rhs_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+ REPEAT_VAR_INIT_TO_CONST(8, uint, zlhs, 0);
+ REPEAT_VAR_INIT_TO_CONST(16, uint, zrhs, 0);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+ // The plane (zlhs) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+ CALCULATE_Z_OFFSET(M0, uint, zlhs, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, lhs_cross_plane_pad,
+ lhs_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply lhs_stride_z by DEPTH_GEMM3D
+ lhs_offset += z * lhs_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+ // Add offset for batched GEMM
+ lhs_offset += z * lhs_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+ // Initialize the accumulators
+ REPEAT_VAR_INIT_TO_CONST(M0, VEC_DATA_TYPE(ACC_DATA_TYPE, N0), c,
+ 0); // VEC_DATA_TYPE(ACC_DATA_TYPE, N0) c0=0,c1=0,c2=0,... c(M0-1)=0;
+
+ int i = 0;
+
+ for (; i <= (K - K0); i += K0)
+ {
+ // Load values from LHS matrix
+ LOAD_BLOCK(M0, K0, DATA_TYPE, a, lhs_ptr, lhs_offset, lhs_stride_y, zlhs);
+
+ // Load values from RHS matrix
+ LOAD_BLOCK(K0, N0, DATA_TYPE, b, rhs_ptr, rhs_offset, rhs_stride_y, zrhs);
+
+ // Partial matrix multiplication M0,N0,K0
+#if (GPU_ARCH == GPU_ARCH_MIDGARD)
+ ARM_MM_NATIVE_N0XK0XM0(VEC_DATA_TYPE(ACC_DATA_TYPE, N0), M0, K0, a, b, c);
+#else // GPU_ARCH == GPU_ARCH_MIDGARD
+ // Transpose the values from RHS matrix
+ TRANSPOSE_K0XN0(K0, N0, b_t, b, DATA_TYPE);
+
+ ARM_MM_K0XN0XM0(M0, N0, K0, a, b_t, c);
+#endif // GPU_ARCH == GPU_ARCH_MIDGARD
+
+ // Update the offset
+ lhs_offset += K0;
+ rhs_offset += K0 * rhs_stride_y;
+ }
+
+ // Left-over for loop
+ for (; i < K; ++i)
+ {
+ // Load values from LHS matrix
+ LOAD_BLOCK(M0, 1, DATA_TYPE, a, lhs_ptr, lhs_offset, lhs_stride_y, zlhs);
+
+ // Load values from RHS matrix
+ LOAD_BLOCK(1, N0, DATA_TYPE, b, rhs_ptr, rhs_offset, rhs_stride_y, zrhs);
+
+ // Partial matrix multiplication M0,N0,1
+#if (GPU_ARCH == GPU_ARCH_MIDGARD)
+ ARM_MM_NATIVE_N0XK0XM0(VEC_DATA_TYPE(ACC_DATA_TYPE, N0), M0, 1, a, b, c);
+#else // GPU_ARCH == GPU_ARCH_MIDGARD
+ // Transpose the values from RHS matrix
+ TRANSPOSE_K0XN0(1, N0, b_t, b, DATA_TYPE);
+
+ ARM_MM_K0XN0XM0(M0, N0, 1, a, b_t, c);
+#endif // GPU_ARCH == GPU_ARCH_MIDGARD
+
+ // Update the offset
+ lhs_offset += 1;
+ rhs_offset += rhs_stride_y;
+ }
+
+ __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes +
+ (x * (uint)N0) * sizeof(int) + (y * (uint)M0 * dst_stride_y);
+
+ REPEAT_VAR_INIT_TO_CONST(M0, uint, zout, 0); // uint zout0=0,zout1=0,zout2=0,... zout7=0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ // The plane (zout) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+ CALCULATE_Z_OFFSET(M0, uint, zout, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad,
+ dst_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply dst_stride_z by DEPTH_GEMM3D
+ dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Add offset for batched GEMM
+ dst_addr += z * dst_stride_z;
+
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+ // Convert and store output block
+ CONVERT_STORE_BLOCK(M0, N0, int, c, dst_addr, dst_stride_y, zout);
+}
+#endif // defined(M0) && defined(N0) && defined(K0) && defined(K)
+
+#if defined(COLS_A)
+/** OpenCL kernel used to compute the row-vectors of sums of all the entries in each row of Matrix
+ * A. It is also possible to multiply each reduced row by a scalar value, if SCALAR is passed at
+ * compile time.
+ *
+ * @note This stage is needed to handle the offset of matrix product
+ * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
+ *
+ * @attention The number of matrix A columns needs to be passed at compile time using -DCOLS_A
+ * @note The input data type must be passed at compile time using -DDATA_TYPE (i.e.
+ * -DDATA_TYPE=uchar)
+ * @note The data type for the accumulation must be passed at compile time using -DACC_DATA_TYPE
+ * (i.e. -DACC_DATA_TYPE=uint)
+ * @note In case of scaling the scalar value must be passed at compile time using -DSCALAR (e.g.
+ * -DSCALAR=3)
+ *
+ * @param[in] src_ptr Pointer to the source tensor. Supported data type:
+ * QASYMM8/QASYMM8_SIGNED
+ * @param[in] src_stride_x Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in] src_step_x src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in] src_stride_y Stride of the source tensor in Y dimension (in
+ * bytes)
+ * @param[in] src_step_y src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in] src_stride_z Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in] src_step_z src_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source
+ * tensor
+ * @param[out] dst_ptr Pointer to the destination tensor Supported data
+ * type: S32
+ * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in
+ * bytes)
+ * @param[in] dst_step_x dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in
+ * bytes)
+ * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * tensor
+ */
+__kernel void gemmlowp_matrix_a_reduction(TENSOR3D_DECLARATION(src), IMAGE_DECLARATION(dst))
+{
+ // Compute source and destination addresses
+ Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
+ Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+ VEC_DATA_TYPE(ACC_DATA_TYPE, 4)
+ sum_row_32 = (VEC_DATA_TYPE(ACC_DATA_TYPE, 4))0;
+ ACC_DATA_TYPE sum_row = 0;
+
+ __global const DATA_TYPE *matrix_a =
+ (__global const DATA_TYPE *)(src.ptr + get_global_id(0) * src_stride_y +
+ get_global_id(1) * src_stride_z);
+
+ int i = 0;
+
+ // This for loop performs 16 accumulations
+ for (; i <= ((int)COLS_A - 16); i += 16)
+ {
+ const VEC_DATA_TYPE(DATA_TYPE, 16) a0 = vload16(0, matrix_a + i);
+
+ sum_row_32 += CONVERT(a0.s0123, VEC_DATA_TYPE(ACC_DATA_TYPE, 4)) +
+ CONVERT(a0.s4567, VEC_DATA_TYPE(ACC_DATA_TYPE, 4)) +
+ CONVERT(a0.s89AB, VEC_DATA_TYPE(ACC_DATA_TYPE, 4)) +
+ CONVERT(a0.sCDEF, VEC_DATA_TYPE(ACC_DATA_TYPE, 4));
+ }
+
+ // This for loop performs the leftover accumulations
+ for (; i < COLS_A; ++i)
+ {
+ sum_row += (ACC_DATA_TYPE)matrix_a[i];
+ }
+
+ sum_row += sum_row_32.s0 + sum_row_32.s1 + sum_row_32.s2 + sum_row_32.s3;
+
+#if defined(SCALAR)
+ sum_row *= (int)SCALAR;
+#endif // defined(SCALAR)
+ *((__global int *)dst.ptr) = (int)sum_row;
+}
+
+#if defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
+/** OpenCL kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A
+ * using the arm dot product instruction. It is also possible to multiply each reduced row by a
+ * scalar value, if SCALAR is passed at compile time.
+ *
+ * @note This stage is needed to handle the offset of matrix product
+ * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
+ *
+ * @attention The number of matrix A columns needs to be passed at compile time using -DCOLS_A
+ * @note The input data type must be passed at compile time using -DDATA_TYPE (i.e.
+ * -DDATA_TYPE=uchar)
+ * @note The data type for the accumulation must be passed at compile time using -DACC_DATA_TYPE
+ * (i.e. -DACC_DATA_TYPE=uint)
+ * @note In case of scaling the scalar value must be passed at compile time using -DSCALAR (e.g.
+ * -DSCALAR=3)
+ *
+ * @param[in] src_ptr Pointer to the source tensor. Supported data type:
+ * QASYMM8/QASYMM8_SIGNED
+ * @param[in] src_stride_x Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in] src_step_x src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in] src_stride_y Stride of the source tensor in Y dimension (in
+ * bytes)
+ * @param[in] src_step_y src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in] src_stride_z Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in] src_step_z src_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source
+ * tensor
+ * @param[out] dst_ptr Pointer to the destination tensor Supported data
+ * type: S32
+ * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in
+ * bytes)
+ * @param[in] dst_step_x dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in
+ * bytes)
+ * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * tensor
+ */
+__kernel void gemmlowp_matrix_a_reduction_dot8(TENSOR3D_DECLARATION(src), IMAGE_DECLARATION(dst))
+{
+ // Compute source and destination addresses
+ Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
+ Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+ ACC_DATA_TYPE sum_row = 0;
+
+ __global const DATA_TYPE *matrix_a =
+ (__global const DATA_TYPE *)(src.ptr + get_global_id(0) * src_stride_y +
+ get_global_id(1) * src_stride_z);
+
+ int i = 0;
+
+ // This for loop performs 16 accumulations
+ for (; i <= ((int)COLS_A - 32); i += 32)
+ {
+ VEC_DATA_TYPE(DATA_TYPE, 16)
+ a0 = vload16(0, matrix_a + i);
+
+ sum_row += arm_dot(a0.s0123, (VEC_DATA_TYPE(DATA_TYPE, 4))(1));
+ sum_row += arm_dot(a0.s4567, (VEC_DATA_TYPE(DATA_TYPE, 4))(1));
+ sum_row += arm_dot(a0.s89AB, (VEC_DATA_TYPE(DATA_TYPE, 4))(1));
+ sum_row += arm_dot(a0.sCDEF, (VEC_DATA_TYPE(DATA_TYPE, 4))(1));
+
+ a0 = vload16(1, matrix_a + i);
+
+ sum_row += arm_dot(a0.s0123, (VEC_DATA_TYPE(DATA_TYPE, 4))(1));
+ sum_row += arm_dot(a0.s4567, (VEC_DATA_TYPE(DATA_TYPE, 4))(1));
+ sum_row += arm_dot(a0.s89AB, (VEC_DATA_TYPE(DATA_TYPE, 4))(1));
+ sum_row += arm_dot(a0.sCDEF, (VEC_DATA_TYPE(DATA_TYPE, 4))(1));
+ }
+
+ // This for loop performs the leftover accumulations
+ for (; i < COLS_A; ++i)
+ {
+ sum_row += (ACC_DATA_TYPE)matrix_a[i];
+ }
+
+#if defined(SCALAR)
+ sum_row *= (int)SCALAR;
+#endif // defined(SCALAR)
+ *((__global int *)dst.ptr) = (int)sum_row;
+}
+#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
+#endif // defined(COLS_A)
+
+#if defined(COLS_B) && defined(ROWS_B)
+/** OpenCL kernel used to compute the row-vectors of sums of all the entries in each column of
+ * Matrix B. It is also possible to multiply each reduced column by a scalar value, if SCALAR is
+ * passed at compile time.
+ *
+ * @note This stage is needed to handle the offset of matrix product
+ * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
+ *
+ * @attention The number of matrix B columns and rows needs to be passed at compile time using
+ * -DCOLS_B and -DROWS_B
+ * @note The input data type must be passed at compile time using -DDATA_TYPE (i.e.
+ * -DDATA_TYPE=uchar)
+ * @note The data type for the accumulation must be passed at compile time using -DACC_DATA_TYPE
+ * (i.e. -DACC_DATA_TYPE=uint)
+ * @note In case of scaling the scalar value must be passed at compile time using -DSCALAR (i.e.
+ * -DSCALAR=3)
+ *
+ * @param[in] src_ptr Pointer to the source tensor. Supported data type:
+ * QASYMM8/QASYMM8_SIGNED
+ * @param[in] src_stride_x Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in] src_step_x src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in] src_stride_y Stride of the source tensor in Y dimension (in
+ * bytes)
+ * @param[in] src_step_y src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in] src_stride_z Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in] src_step_z src_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source
+ * tensor
+ * @param[out] dst_ptr Pointer to the destination tensor Supported data
+ * type: S32
+ * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in
+ * bytes)
+ * @param[in] dst_step_x dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in
+ * bytes)
+ * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * tensor
+ */
+__kernel void gemmlowp_matrix_b_reduction(TENSOR3D_DECLARATION(src), IMAGE_DECLARATION(dst))
+{
+ // Compute source and destination addresses
+ Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
+ Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+ VEC_DATA_TYPE(ACC_DATA_TYPE, 16)
+ sum_col_32 = (VEC_DATA_TYPE(ACC_DATA_TYPE, 16))0;
+
+ __global const DATA_TYPE *matrix_b =
+ (__global const DATA_TYPE *)(src.ptr + get_global_id(1) * src_stride_z);
+
+ int i = 0;
+ // This for loop performs 4 accumulations
+ for (; i <= ((int)ROWS_B - 4); i += 4)
+ {
+ const VEC_DATA_TYPE(DATA_TYPE, 16) b0 = vload16(0, matrix_b + 0 * src_stride_y);
+ const VEC_DATA_TYPE(DATA_TYPE, 16) b1 = vload16(0, matrix_b + 1 * src_stride_y);
+ const VEC_DATA_TYPE(DATA_TYPE, 16) b2 = vload16(0, matrix_b + 2 * src_stride_y);
+ const VEC_DATA_TYPE(DATA_TYPE, 16) b3 = vload16(0, matrix_b + 3 * src_stride_y);
+
+ sum_col_32 += CONVERT(b0, VEC_DATA_TYPE(ACC_DATA_TYPE, 16)) +
+ CONVERT(b1, VEC_DATA_TYPE(ACC_DATA_TYPE, 16)) +
+ CONVERT(b2, VEC_DATA_TYPE(ACC_DATA_TYPE, 16)) +
+ CONVERT(b3, VEC_DATA_TYPE(ACC_DATA_TYPE, 16));
+
+ matrix_b += 4 * src_stride_y;
+ }
+
+ // This for loop perfoms the leftover accumulations
+ for (; i < (int)ROWS_B; ++i)
+ {
+ const VEC_DATA_TYPE(DATA_TYPE, 16) b0 = vload16(0, matrix_b);
+
+ sum_col_32 += CONVERT(b0, VEC_DATA_TYPE(ACC_DATA_TYPE, 16));
+
+ matrix_b += src_stride_y;
+ }
+
+#if defined(SCALAR)
+ sum_col_32 *= (VEC_DATA_TYPE(ACC_DATA_TYPE, 16))SCALAR;
+#endif // defined(SCALAR)
+ VSTORE(16)
+ (convert_int16(sum_col_32), 0, (__global int *)dst.ptr);
+}
+#endif // defined(COLS_B) && defined(ROWS_B)
+
+#endif // defined(DATA_TYPE) && defined(ACC_DATA_TYPE)
+
+#if defined(K_OFFSET)
+
+/* Helper function used to calculate the offset contribution after matrix multiplication.
+ *
+ * This kernel takes a final int32 accumulator value (the output of matrix multiplication),
+ * and calculates the offset contribution of matrix A and matrix B.
+ *
+ * @attention The k_offset = a_offset * b_offset * k (where k is the number of matrix A columns)
+ * needs to be passed at compile time using -DK_OFFSET (i.e. -DK_OFFSET=1200)
+ * @note In case the offset contribution due to a_offset is required, a_offset needs to be passed at
+ * compile time using -DA_OFFSET (i.e. -DA_OFFSET=1)
+ * @note In case the offset contribution due to b_offset is required, b_offset needs to be passed at
+ * compile time using -DB_OFFSET (i.e. -DB_OFFSET=6)
+ * @note In case sum_col has batches, -DSUM_COL_HAS_BATCHES must be passed at compile time. Usually
+ * if gemmlowp is used to accelerate convolution layer, sum_col will not have batches
+ *
+ * @param[in] x get_global_id(0) * 4
+ * @param[in] y get_global_id(1)
+ * @param[in] z get_global_id(2)
+ * @param[in] sum_col_ptr (Optional) Pointer to the source tensor.
+ * Supported data type: same as @p mm_result_ptr
+ * @param[in] sum_col_stride_x (Optional) Stride of the source tensor in X
+ * dimension (in bytes)
+ * @param[in] sum_col_step_x (Optional) sum_col_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] sum_col_stride_y (Optional) Stride of the source tensor in Y
+ * dimension (in bytes)
+ * @param[in] sum_col_step_y (Optional) sum_col_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in] sum_col_offset_first_element_in_bytes (Optional) The offset of the first element in
+ * the source tensor
+ * @param[in] sum_row_ptr (Optional) Pointer to the source tensor.
+ * Supported data type: same as @p mm_result_ptr
+ * @param[in] sum_row_stride_x (Optional) Stride of the source tensor in X
+ * dimension (in bytes)
+ * @param[in] sum_row_step_x (Optional) sum_row_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] sum_row_stride_y (Optional) Stride of the source tensor in Y
+ * dimension (in bytes)
+ * @param[in] sum_row_step_y (Optional) sum_row_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in] sum_row_offset_first_element_in_bytes (Optional) The offset of the first element in
+ * the source tensor
+ * @param[in] biases_ptr (Optional) Pointer to the biases tensor.
+ * Supported data type: same as @p src_ptr
+ * @param[in] biases_stride_x (Optional) Stride of the biases tensor in X
+ * dimension (in bytes)
+ * @param[in] biases_step_x (Optional) biases_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] biases_offset_first_element_in_bytes (Optional) The offset of the first element in
+ * the biases tensor
+ */
+inline int4 offset_contribution(int x, int y, int z
+#if defined(A_OFFSET)
+ ,
+ IMAGE_DECLARATION(sum_col)
+#endif // defined(A_OFFSET)
+#if defined(B_OFFSET)
+ ,
+ IMAGE_DECLARATION(sum_row)
+#endif // defined(B_OFFSET)
+#if defined(ADD_BIAS)
+ ,
+ VECTOR_DECLARATION(biases)
+#endif // defined(ADD_BIAS)
+)
+{
+ int4 a_offset_s32 = (int4)0;
+ int4 b_offset_s32 = (int4)0;
+
+ int batch_id = z;
+#if defined(DEPTH_INPUT3D)
+ batch_id /= (int)DEPTH_INPUT3D;
+#endif // defined(DEPTH_INPUT3D)
+
+#if defined(A_OFFSET)
+ // Compute the offset contribution due to A_OFFSET
+ __global uchar *sum_col_addr =
+ sum_col_ptr + sum_col_offset_first_element_in_bytes + x * sizeof(int);
+
+ // Compute the offset contribution due to A_OFFSET
+#if defined(SUM_COL_HAS_BATCHES)
+ a_offset_s32 = vload4(0, (__global int *)(sum_col_addr + batch_id * sum_col_stride_y));
+#else // defined(SUM_COL_HAS_BATCHES)
+ a_offset_s32 = vload4(0, (__global int *)sum_col_addr);
+#endif // defined(SUM_COL_HAS_BATCHES)
+
+ a_offset_s32 *= (int4)A_OFFSET;
+#endif // defined(A_OFFSET)
+
+#if defined(B_OFFSET)
+ // Compute the offset contribution due to A_OFFSET
+ __global uchar *sum_row_addr =
+ sum_row_ptr + sum_row_offset_first_element_in_bytes + y * sizeof(int);
+
+ // Compute the offset contribution due to B_OFFSET
+#if defined(HEIGHT_INPUT3D) && defined(DEPTH_INPUT3D)
+ b_offset_s32 = (int4) * (((__global int *)(sum_row_addr + batch_id * sum_row_stride_y)) +
+ (z % (int)DEPTH_INPUT3D) * (int)HEIGHT_INPUT3D);
+#else // defined(HEIGHT_INPUT3D) && defined(DEPTH_INPUT3D)
+ b_offset_s32 = (int4) * (((__global int *)(sum_row_addr + batch_id * sum_row_stride_y)));
+#endif // defined(HEIGHT_INPUT3D) && defined(DEPTH_INPUT3D)
+ b_offset_s32 *= (int4)B_OFFSET;
+#endif // defined(B_OFFSET)
+
+#if defined(ADD_BIAS)
+ // Add bias
+ __global uchar *bias_addr = biases_ptr + biases_offset_first_element_in_bytes + x * sizeof(int);
+
+ int4 biases_values = vload4(0, (__global int *)bias_addr);
+ b_offset_s32 += (int4)biases_values;
+#endif // defined(ADD_BIAS)
+
+ return (int4)K_OFFSET + a_offset_s32 + b_offset_s32;
+}
+
+/* OpenCL kernel used to add the offset contribution after matrix multiplication. The computation is
+ * performed in-place
+ *
+ * This kernel takes a final int32 accumulator value (the output of matrix multiplication),
+ * and adds to it the offset contribution of matrix A and matrix B in-place.
+ *
+ * @attention The k_offset = a_offset * b_offset * k (where k is the number of matrix A columns)
+ * needs to be passed at compile time using -DK_OFFSET (i.e. -DK_OFFSET=1200)
+ * @note In case the offset contribution due to a_offset is required, a_offset needs to be passed at
+ * compile time using -DA_OFFSET (i.e. -DA_OFFSET=1)
+ * @note In case the offset contribution due to b_offset is required, b_offset needs to be passed at
+ * compile time using -DB_OFFSET (i.e. -DB_OFFSET=6)
+ * @note In case sum_col has batches, -DSUM_COL_HAS_BATCHES must be passed at compile time. Usually
+ * if gemmlowp is used to accelerate convolution layer, sum_col will not have batches
+ *
+ * The final result is:
+ *
+ * mm_result[i][k] = mm_result[i][k] +
+ * (sum_col[k] * A_OFFSET) +
+ * (sum_row[i] * B_OFFSET) +
+ * (K_OFFSET)
+ *
+ * @param[in] mm_result_ptr Pointer to the source tensor. Supported data
+ * type: S32
+ * @param[in] mm_result_stride_x Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in] mm_result_step_x mm_result_stride_x * number of elements along
+ * X processed per workitem(in bytes)
+ * @param[in] mm_result_stride_y Stride of the source tensor in Y dimension (in
+ * bytes)
+ * @param[in] mm_result_step_y mm_result_stride_y * number of elements along
+ * Y processed per workitem(in bytes)
+ * @param[in] mm_result_stride_z Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in] mm_result_step_z mm_result_stride_z * number of elements along
+ * Z processed per workitem(in bytes)
+ * @param[in] mm_result_offset_first_element_in_bytes The offset of the first element in the source
+ * tensor
+ * @param[in] sum_col_ptr (Optional) Pointer to the source tensor.
+ * Supported data type: same as @p mm_result_ptr
+ * @param[in] sum_col_stride_x (Optional) Stride of the source tensor in X
+ * dimension (in bytes)
+ * @param[in] sum_col_step_x (Optional) sum_col_stride_x * number of
+ * elements along X processed per workitem(in bytes)
+ * @param[in] sum_col_stride_y (Optional) Stride of the source tensor in Y
+ * dimension (in bytes)
+ * @param[in] sum_col_step_y (Optional) sum_col_stride_y * number of
+ * elements along Y processed per workitem(in bytes)
+ * @param[in] sum_col_offset_first_element_in_bytes (Optional) The offset of the first element in
+ * the source tensor
+ * @param[in] sum_row_ptr (Optional) Pointer to the source tensor.
+ * Supported data type: same as @p mm_result_ptr
+ * @param[in] sum_row_stride_x (Optional) Stride of the source tensor in X
+ * dimension (in bytes)
+ * @param[in] sum_row_step_x (Optional) sum_row_stride_x * number of
+ * elements along X processed per workitem(in bytes)
+ * @param[in] sum_row_stride_y (Optional) Stride of the source tensor in Y
+ * dimension (in bytes)
+ * @param[in] sum_row_step_y (Optional) sum_row_stride_y * number of
+ * elements along Y processed per workitem(in bytes)
+ * @param[in] sum_row_offset_first_element_in_bytes (Optional) The offset of the first element in
+ * the source tensor
+ * @param[in] biases_ptr (Optional) Pointer to the biases tensor.
+ * Supported data type: same as @p src_ptr
+ * @param[in] biases_stride_x (Optional) Stride of the biases tensor in X
+ * dimension (in bytes)
+ * @param[in] biases_step_x (Optional) biases_stride_x * number of
+ * elements along X processed per workitem(in bytes)
+ * @param[in] biases_offset_first_element_in_bytes (Optional) The offset of the first element in
+ * the biases tensor
+ */
+__kernel void gemmlowp_offset_contribution(TENSOR3D_DECLARATION(mm_result)
+#if defined(A_OFFSET)
+ ,
+ IMAGE_DECLARATION(sum_col)
+#endif // defined(A_OFFSET)
+#if defined(B_OFFSET)
+ ,
+ IMAGE_DECLARATION(sum_row)
+#endif // defined(B_OFFSET)
+#if defined(ADD_BIAS)
+ ,
+ VECTOR_DECLARATION(biases)
+#endif // defined(ADD_BIAS))
+)
+{
+ const int x = get_global_id(0) * 4;
+ const int y = get_global_id(1);
+ const int z = get_global_id(2);
+
+ // Compute offset contribution
+ int4 offset_term_s32 = offset_contribution(
+ x, y, z
+#if defined(A_OFFSET)
+ ,
+ sum_col_ptr, sum_col_stride_x, sum_col_step_x, sum_col_stride_y, sum_col_step_y,
+ sum_col_offset_first_element_in_bytes
+#endif // defined(A_OFFSET)
+#if defined(B_OFFSET)
+ ,
+ sum_row_ptr, sum_row_stride_x, sum_row_step_x, sum_row_stride_y, sum_row_step_y,
+ sum_row_offset_first_element_in_bytes
+#endif // defined(B_OFFSET)
+#if defined(ADD_BIAS)
+ ,
+ biases_ptr, biases_stride_x, biases_step_x, biases_offset_first_element_in_bytes
+#endif // defined(ADD_BIAS)
+ );
+
+ __global uchar *mm_result_addr = mm_result_ptr + mm_result_offset_first_element_in_bytes +
+ x * sizeof(int) + y * mm_result_stride_y +
+ z * mm_result_stride_z;
+
+ int4 in_s32 = vload4(0, (__global int *)mm_result_addr);
+
+ // Add the offset terms to GEMM's result
+ in_s32 += offset_term_s32;
+
+ // Store the result with the offset contribution
+ vstore4(in_s32, 0, (__global int *)mm_result_addr);
+}
+
+#if defined(RESULT_OFFSET) && defined(RESULT_MULTIPLIER) && defined(RESULT_SHIFT) && \
+ defined(OUTPUT_DATA_TYPE)
+/* OpenCL kernel used to add the offset contribution after @ref CLGEMMLowpMatrixMultiplyKernel and
+ * it quantizes down to uint8.
+ *
+ * This kernel takes a final int32 accumulator value (the output of
+ * @CLGEMMLowpMatrixMultiplyKernel), adds to it the offset contribution of matrix A and matrix B and
+ * quantizes to uint8 through the output stage.
+ *
+ *
+ * @attention The k_offset = a_offset * b_offset * k (where k is the number of matrix A columns)
+ * needs to be passed at compile time using -DK_OFFSET (i.e. -DK_OFFSET=1200)
+ * @note In case the offset contribution due to a_offset is required, a_offset needs to be passed at
+ * compile time using -DA_OFFSET (i.e. -DA_OFFSET=1)
+ * @note In case the offset contribution due to b_offset is required, b_offset needs to be passed at
+ * compile time using -DB_OFFSET (i.e. -DB_OFFSET=6)
+ * @note In case sum_col has batches, -DSUM_COL_HAS_BATCHES must be passed at compile time. Usually
+ * if gemmlowp is used to accelerate convolution layer, sum_col will not have batches
+ *
+ * The result before the output stage is:
+ *
+ * mm_result[i][k] = mm_result[i][k] +
+ * (sum_col[k] * A_OFFSET) +
+ * (sum_row[i] * B_OFFSET) +
+ * (K_OFFSET)
+ *
+ * This result is quantized down to uint8/int8 using the output stage. The output stage computes the
+ * following operations:
+ *
+ * -# Add offset terms to final result
+ * -# Multiply each entry of result by result_mult_int
+ * -# Add bias to final result (if -DADD_BIAS is passed at compile time)
+ * -# Shift the int32 accumulator by result_shift
+ * -# Clamp the value between the specified min and max bounds (if -DMIN_BOUND and/or -DMAX_BOUND
+ * are passed at compile time)
+ * -# Clamp the resulting int32 values:
+ * - to the [0..255] range and cast to QASYMM8.
+ * - to the [-128..127] range and cast to QASYMM8_SIGNED.
+ *
+ * @attention The offset, scalar scale factor and number of bits to shift right of output tensor
+ * must be passed at compile time using -DRESULT_OFFSET, -RESULT_MULT_INT and -DRESULT_SHIFT
+ *
+ * @note In case the addition of int32 biases is required, -DADD_BIAS should be passed at compile
+ * time
+ * @note The output datatype should be passed at compile time using -DOUTPUT_DATA_TYPE
+ * @note In case the clamping of the result is required, the min and max bounds can be passed at
+ * compile time using -DMIN_BOUND and -DMAX_BOUND. These values can be used to implement "rectified
+ * linear unit" activation functions
+ *
+ * @param[in] mm_result_ptr Pointer to the source tensor.
+ * Supported data type: S32
+ * @param[in] mm_result_stride_x Stride of the source tensor in X
+ * dimension (in bytes)
+ * @param[in] mm_result_step_x mm_result_stride_x * number of
+ * elements along X processed per workitem(in bytes)
+ * @param[in] mm_result_stride_y Stride of the source tensor in Y
+ * dimension (in bytes)
+ * @param[in] mm_result_step_y mm_result_stride_y * number of
+ * elements along Y processed per workitem(in bytes)
+ * @param[in] mm_result_stride_z Stride of the source tensor in Z
+ * dimension (in bytes)
+ * @param[in] mm_result_step_z mm_result_stride_z * number of
+ * elements along Z processed per workitem(in bytes)
+ * @param[in] mm_result_offset_first_element_in_bytes The offset of the first element in
+ * the source tensor
+ * @param[in] sum_col_ptr (Optional) Pointer to the source
+ * tensor. Supported data type: same as @p mm_result_ptr
+ * @param[in] sum_col_stride_x (Optional) Stride of the source
+ * tensor in X dimension (in bytes)
+ * @param[in] sum_col_step_x (Optional) sum_col_stride_x * number
+ * of elements along X processed per workitem(in bytes)
+ * @param[in] sum_col_stride_y (Optional) Stride of the source
+ * tensor in Y dimension (in bytes)
+ * @param[in] sum_col_step_y (Optional) sum_col_stride_y * number
+ * of elements along Y processed per workitem(in bytes)
+ * @param[in] sum_col_offset_first_element_in_bytes (Optional) The offset of the first
+ * element in the source tensor
+ * @param[in] sum_row_ptr (Optional) Pointer to the source
+ * tensor. Supported data type: same as @p mm_result_ptr
+ * @param[in] sum_row_stride_x (Optional) Stride of the source
+ * tensor in X dimension (in bytes)
+ * @param[in] sum_row_step_x (Optional) sum_row_stride_x * number
+ * of elements along X processed per workitem(in bytes)
+ * @param[in] sum_row_stride_y (Optional) Stride of the source
+ * tensor in Y dimension (in bytes)
+ * @param[in] sum_row_step_y (Optional) sum_row_stride_y * number
+ * of elements along Y processed per workitem(in bytes)
+ * @param[in] sum_row_offset_first_element_in_bytes (Optional) The offset of the first
+ * element in the source tensor
+ * @param[in] biases_ptr (Optional) Pointer to the biases
+ * tensor. Supported data type: same as @p src_ptr
+ * @param[in] biases_stride_x (Optional) Stride of the biases
+ * tensor in X dimension (in bytes)
+ * @param[in] biases_step_x (Optional) biases_stride_x * number
+ * of elements along X processed per workitem(in bytes)
+ * @param[in] biases_offset_first_element_in_bytes (Optional) The offset of the first
+ * element in the biases tensor
+ * @param[out] dst_ptr Pointer to the destination tensor
+ * Supported data type: QASYMM8/QASYMM8_SIGNED
+ * @param[in] dst_stride_x Stride of the destination tensor in
+ * X dimension (in bytes)
+ * @param[in] dst_step_x dst_gx_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination tensor in
+ * Y dimension (in bytes)
+ * @param[in] dst_step_y dst_gx_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in] dst_stride_z Stride of the source tensor in Z
+ * dimension (in bytes)
+ * @param[in] dst_step_z src_stride_z * number of elements
+ * along Z processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in
+ * the destination tensor
+ * @param[in] result_multipliers_ptr (Optional) Pointer to the output
+ * multipliers vector for per-channel quantization. Supported data types: S32
+ * @param[in] result_multipliers_stride_x (Optional) Stride of the output
+ * multipliers vector in X dimension (in bytes)
+ * @param[in] result_multipliers_step_x (Optional)
+ * output_multipliers_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] result_multipliers_offset_first_element_in_bytes (Optional) The offset of the first
+ * element in the output multipliers vector
+ * @param[in] result_shifts_ptr (Optional) Pointer to the output
+ * shifts vector for per-channel quantization. Supported data types: S32
+ * @param[in] result_shifts_stride_x (Optional) Stride of the output
+ * shifts vector in X dimension (in bytes)
+ * @param[in] result_shifts_step_x (Optional) output_shifts_stride_x *
+ * number of elements along X processed per workitem(in bytes)
+ * @param[in] result_shifts_offset_first_element_in_bytes (Optional) The offset of the first
+ * element in the output shifts vector
+ */
+__kernel void gemmlowp_offset_contribution_quantize_down(TENSOR3D_DECLARATION(mm_result)
+#if defined(A_OFFSET)
+ ,
+ IMAGE_DECLARATION(sum_col)
+#endif // defined(A_OFFSET)
+#if defined(B_OFFSET)
+ ,
+ IMAGE_DECLARATION(sum_row)
+#endif // defined(B_OFFSET)
+ ,
+#if defined(ADD_BIAS)
+ VECTOR_DECLARATION(biases),
+#endif // defined(ADD_BIAS)
+ TENSOR3D_DECLARATION(dst)
+#if defined(PER_CHANNEL_QUANTIZATION)
+ ,
+ VECTOR_DECLARATION(result_multipliers),
+ VECTOR_DECLARATION(result_shifts)
+#endif // defined(PER_CHANNEL_QUANTIZATION)
+)
+{
+ const int x = get_global_id(0) * 4;
+ const int y = get_global_id(1);
+ const int z = get_global_id(2);
+
+ __global uchar *dst_addr =
+ dst_ptr + dst_offset_first_element_in_bytes + x + y * dst_stride_y + z * dst_stride_z;
+
+ // Compute offset contribution
+ int4 offset_term_s32 = offset_contribution(
+ x, y, z
+#if defined(A_OFFSET)
+ ,
+ sum_col_ptr, sum_col_stride_x, sum_col_step_x, sum_col_stride_y, sum_col_step_y,
+ sum_col_offset_first_element_in_bytes
+#endif // defined(A_OFFSET)
+#if defined(B_OFFSET)
+ ,
+ sum_row_ptr, sum_row_stride_x, sum_row_step_x, sum_row_stride_y, sum_row_step_y,
+ sum_row_offset_first_element_in_bytes
+#endif // defined(B_OFFSET)
+#if defined(ADD_BIAS)
+ ,
+ biases_ptr, biases_stride_x, biases_step_x, biases_offset_first_element_in_bytes
+#endif // defined(ADD_BIAS)
+ );
+
+ __global uchar *mm_result_addr = mm_result_ptr + mm_result_offset_first_element_in_bytes +
+ x * sizeof(int) + y * mm_result_stride_y +
+ z * mm_result_stride_z;
+
+ int4 in_s32 = vload4(0, (__global int *)mm_result_addr);
+
+ // Add the offset terms to GEMM's result
+ in_s32 += offset_term_s32;
+
+ // -------------- OUTPUT STAGE
+
+ // Add the offset terms to GEMM's result
+ in_s32 += (int4)RESULT_OFFSET;
+
+ // Multiply by result_mult_int and shift
+#if defined(PER_CHANNEL_QUANTIZATION)
+ __global uchar *result_multipliers_addr =
+ result_multipliers_ptr + result_multipliers_offset_first_element_in_bytes + x * sizeof(int);
+ __global uchar *result_shifts_addr =
+ result_shifts_ptr + result_shifts_offset_first_element_in_bytes + x * sizeof(int);
+ int4 result_multipliers_values = vload4(0, (__global int *)result_multipliers_addr);
+ int4 result_shifts_values = vload4(0, (__global int *)result_shifts_addr);
+
+ in_s32 *= result_multipliers_values;
+ in_s32 >>= result_shifts_values;
+#else // defined(PER_CHANNEL_QUANTIZATION)
+ in_s32 *= RESULT_MULTIPLIER;
+
+ in_s32 >>= RESULT_SHIFT;
+#endif // defined(PER_CHANNEL_QUANTIZATION)
+
+ VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4)
+ res = CONVERT_SAT(in_s32, VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4));
+
+#if defined(MIN_BOUND)
+ res = max(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MIN_BOUND);
+#endif // defined(MIN_BOUND)
+#if defined(MAX_BOUND)
+ res = min(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MAX_BOUND);
+#endif // defined(MAX_BOUND)
+
+ // Store the result
+ vstore4(res, 0, (__global OUTPUT_DATA_TYPE *)dst_addr);
+}
+
+/* OpenCL kernel used to add the offset contribution after matrix multiplication and it quantizes
+ * down to uint8.
+ *
+ * This kernel takes a final int32 accumulator value (the output of matrix multiplication), adds to
+ * it the offset contribution of matrix A and matrix B and quantizes to uint8 through the output
+ * stage.
+ *
+ *
+ * @attention The k_offset = a_offset * b_offset * k (where k is the number of matrix A columns)
+ * needs to be passed at compile time using -DK_OFFSET (i.e. -DK_OFFSET=1200)
+ * @note In case the offset contribution due to a_offset is required, a_offset needs to be passed at
+ * compile time using -DA_OFFSET (i.e. -DA_OFFSET=1)
+ * @note In case the offset contribution due to b_offset is required, b_offset needs to be passed at
+ * compile time using -DB_OFFSET (i.e. -DB_OFFSET=6)
+ * @note In case sum_col has batches, -DSUM_COL_HAS_BATCHES must be passed at compile time. Usually
+ * if gemmlowp is used to accelerate convolution layer, sum_col will not have batches
+ *
+ * The result before the output stage is:
+ *
+ * mm_result[i][k] = mm_result[i][k] +
+ * (sum_col[k] * A_OFFSET) +
+ * (sum_row[i] * B_OFFSET) +
+ * (K_OFFSET)
+ *
+ * This result is quantized down to uint8/int8 using the output stage. The output stage computes the
+ * following operations:
+ *
+ * -# Compute fixed point multiplication between each entry of input by
+ * result_fixedpoint_multiplier
+ * -# Add bias to final result if bias tensor is not a nullptr
+ * -# Round to nearest division by a power-of-two using result_shift
+ * -# Add offset to each result
+ * -# Clamp the value between the specified min and max bounds
+ * -# Clamp the resulting int32 values:
+ * - to the [0..255] range and cast to QASYMM8.
+ * - to the [-128..127] range and cast to QASYMM8_SIGNED.
+ *
+ * @attention The offset, scalar scale factor and number of bits to shift right of output tensor
+ * must be passed at compile time using -DRESULT_OFFSET, -RESULT_MULT_INT and -DRESULT_SHIFT
+ *
+ * @note In case the addition of int32 biases is required, -DADD_BIAS should be passed at compile
+ * time
+ * @note The output datatype should be passed at compile time using -DOUTPUT_DATA_TYPE
+ * @note In case the clamping of the result is required, the min and max bounds can be passed at
+ * compile time using -DMIN_BOUND and -DMAX_BOUND. These values can be used to implement "rectified
+ * linear unit" activation functions
+ *
+ * @param[in] mm_result_ptr Pointer to the source tensor.
+ * Supported data type: S32
+ * @param[in] mm_result_stride_x Stride of the source tensor in X
+ * dimension (in bytes)
+ * @param[in] mm_result_step_x mm_result_stride_x * number of
+ * elements along X processed per workitem(in bytes)
+ * @param[in] mm_result_stride_y Stride of the source tensor in Y
+ * dimension (in bytes)
+ * @param[in] mm_result_step_y mm_result_stride_y * number of
+ * elements along Y processed per workitem(in bytes)
+ * @param[in] mm_result_stride_z Stride of the source tensor in Z
+ * dimension (in bytes)
+ * @param[in] mm_result_step_z mm_result_stride_z * number of
+ * elements along Z processed per workitem(in bytes)
+ * @param[in] mm_result_offset_first_element_in_bytes The offset of the first element in
+ * the source tensor
+ * @param[in] sum_col_ptr (Optional) Pointer to the source
+ * tensor. Supported data type: same as @p mm_result_ptr
+ * @param[in] sum_col_stride_x (Optional) Stride of the source
+ * tensor in X dimension (in bytes)
+ * @param[in] sum_col_step_x (Optional) sum_col_stride_x * number
+ * of elements along X processed per workitem(in bytes)
+ * @param[in] sum_col_stride_y (Optional) Stride of the source
+ * tensor in Y dimension (in bytes)
+ * @param[in] sum_col_step_y (Optional) sum_col_stride_y * number
+ * of elements along Y processed per workitem(in bytes)
+ * @param[in] sum_col_offset_first_element_in_bytes (Optional) The offset of the first
+ * element in the source tensor
+ * @param[in] sum_row_ptr (Optional) Pointer to the source
+ * tensor. Supported data type: same as @p mm_result_ptr
+ * @param[in] sum_row_stride_x (Optional) Stride of the source
+ * tensor in X dimension (in bytes)
+ * @param[in] sum_row_step_x (Optional) sum_row_stride_x * number
+ * of elements along X processed per workitem(in bytes)
+ * @param[in] sum_row_stride_y (Optional) Stride of the source
+ * tensor in Y dimension (in bytes)
+ * @param[in] sum_row_step_y (Optional) sum_row_stride_y * number
+ * of elements along Y processed per workitem(in bytes)
+ * @param[in] sum_row_offset_first_element_in_bytes (Optional) The offset of the first
+ * element in the source tensor
+ * @param[in] biases_ptr (Optional) Pointer to the biases
+ * tensor. Supported data type: same as @p src_ptr
+ * @param[in] biases_stride_x (Optional) Stride of the biases
+ * tensor in X dimension (in bytes)
+ * @param[in] biases_step_x (Optional) biases_stride_x * number
+ * of elements along X processed per workitem(in bytes)
+ * @param[in] biases_offset_first_element_in_bytes (Optional) The offset of the first
+ * element in the biases tensor
+ * @param[out] dst_ptr Pointer to the destination tensor
+ * Supported data type: QASYMM8
+ * @param[in] dst_stride_x Stride of the destination tensor in
+ * X dimension (in bytes)
+ * @param[in] dst_step_x dst_gx_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination tensor in
+ * Y dimension (in bytes)
+ * @param[in] dst_step_y dst_gx_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in] dst_stride_z Stride of the source tensor in Z
+ * dimension (in bytes)
+ * @param[in] dst_step_z src_stride_z * number of elements
+ * along Z processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in
+ * the destination tensor
+ * @param[in] result_multipliers_ptr (Optional) Pointer to the output
+ * multipliers vector for per-channel quantization. Supported data types: S32
+ * @param[in] result_multipliers_stride_x (Optional) Stride of the output
+ * multipliers vector in X dimension (in bytes)
+ * @param[in] result_multipliers_step_x (Optional)
+ * output_multipliers_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] result_multipliers_offset_first_element_in_bytes (Optional) The offset of the first
+ * element in the output multipliers vector
+ * @param[in] result_shifts_ptr (Optional) Pointer to the output
+ * shifts vector for per-channel quantization. Supported data types: S32
+ * @param[in] result_shifts_stride_x (Optional) Stride of the output
+ * shifts vector in X dimension (in bytes)
+ * @param[in] result_shifts_step_x (Optional) output_shifts_stride_x *
+ * number of elements along X processed per workitem(in bytes)
+ * @param[in] result_shifts_offset_first_element_in_bytes (Optional) The offset of the first
+ * element in the output shifts vector
+ */
+__kernel void
+ gemmlowp_offset_contribution_quantize_down_fixedpoint(TENSOR3D_DECLARATION(mm_result)
+#if defined(A_OFFSET)
+ ,
+ IMAGE_DECLARATION(sum_col)
+#endif // defined(A_OFFSET)
+#if defined(B_OFFSET)
+ ,
+ IMAGE_DECLARATION(sum_row)
+#endif // defined(B_OFFSET)
+ ,
+#if defined(ADD_BIAS)
+ VECTOR_DECLARATION(biases),
+#endif // defined(ADD_BIAS)
+ TENSOR3D_DECLARATION(dst)
+#if defined(PER_CHANNEL_QUANTIZATION)
+ ,
+ VECTOR_DECLARATION(result_multipliers),
+ VECTOR_DECLARATION(result_shifts)
+#endif // defined(PER_CHANNEL_QUANTIZATION)
+ )
+{
+ const int x = get_global_id(0) * 4;
+ const int y = get_global_id(1);
+ const int z = get_global_id(2);
+
+ // Compute offset contribution
+ int4 offset_term_s32 = offset_contribution(
+ x, y, z
+#if defined(A_OFFSET)
+ ,
+ sum_col_ptr, sum_col_stride_x, sum_col_step_x, sum_col_stride_y, sum_col_step_y,
+ sum_col_offset_first_element_in_bytes
+#endif // defined(A_OFFSET)
+#if defined(B_OFFSET)
+ ,
+ sum_row_ptr, sum_row_stride_x, sum_row_step_x, sum_row_stride_y, sum_row_step_y,
+ sum_row_offset_first_element_in_bytes
+#endif // defined(B_OFFSET)
+#if defined(ADD_BIAS)
+ ,
+ biases_ptr, biases_stride_x, biases_step_x, biases_offset_first_element_in_bytes
+#endif // defined(ADD_BIAS)
+ );
+
+ __global uchar *mm_result_addr = mm_result_ptr + mm_result_offset_first_element_in_bytes +
+ x * sizeof(int) + y * mm_result_stride_y +
+ z * mm_result_stride_z;
+
+ __global uchar *dst_addr =
+ dst_ptr + dst_offset_first_element_in_bytes + x + y * dst_stride_y + z * dst_stride_z;
+
+ int4 in_s32 = vload4(0, (__global int *)mm_result_addr);
+
+ // Add the offset terms to GEMM's result
+ in_s32 += offset_term_s32;
+
+ // -------------- OUTPUT STAGE
+
+ // Multiply by result_mult_int and shift
+#if defined(PER_CHANNEL_QUANTIZATION)
+ __global uchar *result_multipliers_addr =
+ result_multipliers_ptr + result_multipliers_offset_first_element_in_bytes + x * sizeof(int);
+ __global uchar *result_shifts_addr =
+ result_shifts_ptr + result_shifts_offset_first_element_in_bytes + x * sizeof(int);
+ int4 result_multipliers_values = vload4(0, (__global int *)result_multipliers_addr);
+ int4 result_shifts_values = vload4(0, (__global int *)result_shifts_addr);
+
+ int4 in_s32_shift_lt0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(
+ in_s32, result_multipliers_values, result_shifts_values, 4);
+ int4 in_s32_shift_gt0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(
+ in_s32, result_multipliers_values, result_shifts_values, 4);
+ in_s32 = select(in_s32_shift_lt0, in_s32_shift_gt0, result_shifts_values >= 0);
+#else // defined(PER_CHANNEL_QUANTIZATION)
+
+#if RESULT_SHIFT < 0
+ in_s32 =
+ ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(in_s32, RESULT_MULTIPLIER, RESULT_SHIFT, 4);
+#else // RESULT_SHIFT >= 0
+ in_s32 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(in_s32, RESULT_MULTIPLIER, RESULT_SHIFT, 4);
+#endif // RESULT_SHIFT < 0
+
+#endif // defined(PER_CHANNEL_QUANTIZATION)
+
+ // Add the offset terms to GEMM's result
+ in_s32 += (int4)RESULT_OFFSET;
+
+ VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4)
+ res = CONVERT_SAT(in_s32, VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4));
+
+#if defined(MIN_BOUND)
+ res = max(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MIN_BOUND);
+#endif // defined(MIN_BOUND)
+#if defined(MAX_BOUND)
+ res = min(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MAX_BOUND);
+#endif // defined(MAX_BOUND)
+
+ // Store the result
+ vstore4(res, 0, (__global OUTPUT_DATA_TYPE *)dst_addr);
+}
+#endif // defined(RESULT_OFFSET) && defined(RESULT_MULTIPLIER) && defined(RESULT_SHIFT) &&
+ // defined(OUTPUT_DATA_TYPE)
+
+#endif // defined(K_OFFSET)
+
+#if defined(RESULT_OFFSET) && defined(RESULT_MULT_INT) && defined(RESULT_SHIFT)
+/** This OpenCL kernel is used to quantize down the int32 accumulator values of GEMMLowp to
+ * QASYMM8/QASYMM8_SIGNED
+ *
+ * This kernel takes a final int32 accumulator value and processes it to obtain the final
+ * QASYMM8/QASYMM8_SIGNED value. The following computations will be performed by the kernel:
+ *
+ * -# Add offset terms to final result
+ * -# Multiply each entry of result by result_mult_int
+ * -# Add bias to final result (if -DADD_BIAS is passed at compile time)
+ * -# Shift the int32 accumulator by result_shift
+ * -# Clamp the value between the specified min and max bounds (if -DMIN_BOUND and/or -DMAX_BOUND
+ * are passed at compile time)
+ * -# Clamp the resulting int32 values:
+ * -# - to the [0..255] range and cast to QASYMM8.
+ * -# - to the [-128..127] range and cast to QASYMM8_SIGNED.
+ *
+ * @attention The offset, scalar scale factor and number of bits to shift right of output tensor
+ * must be passed at compile time using -DRESULT_OFFSET, -RESULT_MULT_INT and -DRESULT_SHIFT
+ *
+ * @note In case the addition of int32 biases is required, -DADD_BIAS should be passed at compile
+ * time
+ * @note The output datatype should be passed at compile time using -DOUTPUT_DATA_TYPE
+ * @note In case the clamping of the result is required, the min and max bounds can be passed at
+ * compile time using -DMIN_BOUND and -DMAX_BOUND. These values can be used to implement "rectified
+ * linear unit" activation functions
+ *
+ * @param[in] src_ptr Pointer to the source tensor. Supported data
+ * type: S32
+ * @param[in] src_stride_x Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in] src_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src_stride_y Stride of the source tensor in Y dimension (in
+ * bytes)
+ * @param[in] src_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src_stride_z Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in] src_step_z src_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source
+ * tensor
+ * @param[in] biases_ptr (Optional) Pointer to the biases tensor.
+ * Supported data type: same as @p src_ptr
+ * @param[in] biases_stride_x (Optional) Stride of the biases tensor in X
+ * dimension (in bytes)
+ * @param[in] biases_step_x (Optional) biases_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] biases_offset_first_element_in_bytes (Optional) The offset of the first element in
+ * the biases tensor
+ * @param[out] dst_ptr Pointer to the destination tensor Supported data
+ * type: QASYMM8/QASYMM8_SIGNED
+ * @param[in] dst_stride_x Stride of the destination tensor in X dimension
+ * (in bytes)
+ * @param[in] dst_step_x dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination tensor in Y dimension
+ * (in bytes)
+ * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_z Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in] dst_step_z src_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the
+ * destination tensor
+ */
+__kernel void gemmlowp_output_stage_quantize_down(TENSOR3D_DECLARATION(src),
+#if defined(ADD_BIAS)
+ VECTOR_DECLARATION(biases),
+#endif // defined(ADD_BIAS)
+ TENSOR3D_DECLARATION(dst))
+{
+ // Compute source and destination addresses
+ int x = get_global_id(0) * 4;
+ int y = get_global_id(1);
+ int z = get_global_id(2);
+
+ __global uchar *src_addr = src_ptr + src_offset_first_element_in_bytes + x * sizeof(int) +
+ y * src_stride_y + z * src_stride_z;
+
+ __global uchar *dst_addr =
+ dst_ptr + dst_offset_first_element_in_bytes + x + y * dst_stride_y + z * dst_stride_z;
+
+ int4 input_values = vload4(0, (__global int *)src_addr);
+
+#if defined(ADD_BIAS)
+ // Add bias
+ __global uchar *bias_addr = biases_ptr + biases_offset_first_element_in_bytes + x * sizeof(int);
+
+ int4 biases_values = vload4(0, (__global int *)bias_addr);
+ input_values += (int4)biases_values;
+#endif // defined(ADD_BIAS)
+
+ // Add the offset terms to GEMM's result
+ input_values += (int4)RESULT_OFFSET;
+
+ // Multiply by result_mult_int and shift
+ input_values *= RESULT_MULT_INT;
+
+#if RESULT_SHIFT < 0
+ input_values >>= -RESULT_SHIFT;
+#else // RESULT_SHIFT >= 0
+ input_values >>= RESULT_SHIFT;
+#endif // RESULT_SHIFT < 0
+
+ VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4)
+ res = CONVERT_SAT(input_values, VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4));
+
+#if defined(MIN_BOUND)
+ res = max(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MIN_BOUND);
+#endif // defined(MIN_BOUND)
+#if defined(MAX_BOUND)
+ res = min(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MAX_BOUND);
+#endif // defined(MAX_BOUND)
+
+ // Store the result
+ vstore4(res, 0, (__global OUTPUT_DATA_TYPE *)dst_addr);
+}
+#endif // defined(RESULT_OFFSET) && defined(RESULT_MULT_INT) && defined(RESULT_SHIFT)
+
+#if defined(RESULT_OFFSET_AFTER_SHIFT) && defined(RESULT_FIXEDPOINT_MULTIPLIER) && \
+ defined(RESULT_SHIFT)
+/** This OpenCL kernel is used to quantize down the int32 accumulator values of GEMMLowp to
+ * QASYMM8/QASYMM8_SIGNED
+ *
+ * This kernel takes a final int32 accumulator value (the output of matrix multiplication), and
+ * processes it to obtain the final QASYMM8/QASYMM8_SIGNED value. The following computations will be
+ * performed by the kernel:
+ *
+ * -# Compute fixed point multiplication between each entry of input by
+ * result_fixedpoint_multiplier
+ * -# Add bias to final result if bias tensor is not a nullptr
+ * -# Round to nearest division by a power-of-two using result_shift
+ * -# Add offset to each result
+ * -# Clamp the value between the specified min and max bounds
+ * -# Clamp the resulting int32 values:
+ * - to the [0..255] range and cast to QASYMM8.
+ * - to the [-128..127] range and cast to QASYMM8_SIGNED.
+ *
+ * @attention The offset, scalar scale factor and number of bits to shift right of output tensor
+ * must be passed at compile time using -DRESULT_OFFSET_AFTER_SHIFT, -DRESULT_FIXEDPOINT_MULTIPLIER
+ * and -DRESULT_SHIFT
+ *
+ * @note In case the addition of int32 biases is required, -DADD_BIAS should be passed at compile
+ * time
+ * @note The output datatype should be passed at compile time using -DOUTPUT_DATA_TYPE
+ * @note In case the clamping of the result is required, the min and max bounds can be passed at
+ * compile time using -DMIN_BOUND and -DMAX_BOUND. These values can be used to implement "rectified
+ * linear unit" activation functions
+ *
+ * @param[in] src_ptr Pointer to the source tensor. Supported data
+ * type: S32
+ * @param[in] src_stride_x Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in] src_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src_stride_y Stride of the source tensor in Y dimension (in
+ * bytes)
+ * @param[in] src_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src_stride_z Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in] src_step_z src_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source
+ * tensor
+ * @param[in] biases_ptr (Optional) Pointer to the biases tensor.
+ * Supported data type: same as @p src_ptr
+ * @param[in] biases_stride_x (Optional) Stride of the biases tensor in X
+ * dimension (in bytes)
+ * @param[in] biases_step_x (Optional) biases_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] biases_offset_first_element_in_bytes (Optional) The offset of the first element in
+ * the biases tensor
+ * @param[out] dst_ptr Pointer to the destination tensor Supported data
+ * type: QASYMM8/QASYMM8_SIGNED
+ * @param[in] dst_stride_x Stride of the destination tensor in X dimension
+ * (in bytes)
+ * @param[in] dst_step_x dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination tensor in Y dimension
+ * (in bytes)
+ * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_z Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in] dst_step_z src_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the
+ * destination tensor
+ */
+__kernel void gemmlowp_output_stage_quantize_down_fixedpoint(TENSOR3D_DECLARATION(src),
+#if defined(ADD_BIAS)
+ VECTOR_DECLARATION(biases),
+#endif // defined(ADD_BIAS)
+ TENSOR3D_DECLARATION(dst))
+{
+ // Compute source and destination addresses
+ int x = get_global_id(0) * 4;
+ int y = get_global_id(1);
+ int z = get_global_id(2);
+
+ __global uchar *src_addr = src_ptr + src_offset_first_element_in_bytes + x * sizeof(int) +
+ y * src_stride_y + z * src_stride_z;
+
+ __global uchar *dst_addr =
+ dst_ptr + dst_offset_first_element_in_bytes + x + y * dst_stride_y + z * dst_stride_z;
+
+ int4 input_values = vload4(0, (__global int *)src_addr);
+
+#if defined(ADD_BIAS)
+ // Add bias
+ __global uchar *bias_addr = biases_ptr + biases_offset_first_element_in_bytes + x * sizeof(int);
+
+ int4 biases_values = vload4(0, (__global int *)bias_addr);
+ input_values += (int4)biases_values;
+#endif // defined(ADD_BIAS)
+
+ // Multiply by result_mult_int and shift
+#if RESULT_SHIFT < 0
+ input_values = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(
+ input_values, RESULT_FIXEDPOINT_MULTIPLIER, RESULT_SHIFT, 4);
+#else // RESULT_SHIFT >= 0
+ input_values = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(
+ input_values, RESULT_FIXEDPOINT_MULTIPLIER, RESULT_SHIFT, 4);
+#endif // RESULT_SHIFT < 0
+
+ // Add the offset terms to GEMM's result
+ input_values += (int4)RESULT_OFFSET_AFTER_SHIFT;
+
+ VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4)
+ res = CONVERT_SAT(input_values, VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4));
+
+#if defined(MIN_BOUND)
+ res = max(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MIN_BOUND);
+#endif // defined(MIN_BOUND)
+#if defined(MAX_BOUND)
+ res = min(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MAX_BOUND);
+#endif // defined(MAX_BOUND)
+
+ // Store the result
+ vstore4(res, 0, (__global OUTPUT_DATA_TYPE *)dst_addr);
+}
+#endif // defined(RESULT_OFFSET_AFTER_SHIFT) && defined(RESULT_FIXEDPOINT_MULTIPLIER) &&
+ // defined(RESULT_SHIFT)
+
+#if defined(RESULT_FIXEDPOINT_MULTIPLIER) && defined(RESULT_SHIFT)
+
+/** This OpenCL kernel is used to quantize down the int32 accumulator values of GEMMLowp to QSYMM16
+ *
+ * This kernel takes a final int32 accumulator value (the output of matrix multiplication), and
+ * processes it to obtain the final QSYMM16 value. The following computations will be performed by
+ * the kernel:
+ *
+ * -# Compute fixed point multiplication between each entry of input by
+ * result_fixedpoint_multiplier
+ * -# Add bias to final result if bias tensor is not a nullptr
+ * -# Round to nearest division by a power-of-two using result_shift
+ * -# Add offset to each result
+ * -# Clamp the value between the specified min and max bounds
+ * -# Clamp the resulting int32 values to the [-32768..32767] range and cast to QSYMM16.
+ *
+ * @attention The offset, scalar scale factor and number of bits to shift right of output tensor
+ * must be passed at compile time using -DRESULT_FIXEDPOINT_MULTIPLIER and -DRESULT_SHIFT
+ *
+ * @note In case the addition of int32 biases is required, -DADD_BIAS should be passed at compile
+ * time
+ * @note In case the clamping of the result is required, the min and max bounds can be passed at
+ * compile time using -DMIN_BOUND and -DMAX_BOUND. These values can be used to implement "rectified
+ * linear unit" activation functions
+ *
+ * @param[in] src_ptr Pointer to the source tensor. Supported data
+ * type: S32
+ * @param[in] src_stride_x Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in] src_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src_stride_y Stride of the source tensor in Y dimension (in
+ * bytes)
+ * @param[in] src_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src_stride_z Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in] src_step_z src_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source
+ * tensor
+ * @param[in] biases_ptr (Optional) Pointer to the biases tensor.
+ * Supported data type: same as @p src_ptr
+ * @param[in] biases_stride_x (Optional) Stride of the biases tensor in X
+ * dimension (in bytes)
+ * @param[in] biases_step_x (Optional) biases_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] biases_offset_first_element_in_bytes (Optional) The offset of the first element in
+ * the biases tensor
+ * @param[out] dst_ptr Pointer to the destination tensor Supported data
+ * type: QSYMM16
+ * @param[in] dst_stride_x Stride of the destination tensor in X dimension
+ * (in bytes)
+ * @param[in] dst_step_x dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination tensor in Y dimension
+ * (in bytes)
+ * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_z Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in] dst_step_z src_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the
+ * destination tensor
+ */
+__kernel void gemmlowp_output_stage_quantize_down_fixedpoint_qsymm16(TENSOR3D_DECLARATION(src),
+#if defined(ADD_BIAS)
+ VECTOR_DECLARATION(biases),
+#endif // defined(ADD_BIAS)
+ TENSOR3D_DECLARATION(dst))
+{
+ // Compute source and destination addresses
+ int x = get_global_id(0) * 4;
+ int y = get_global_id(1);
+ int z = get_global_id(2);
+
+ __global uchar *src_addr = src_ptr + src_offset_first_element_in_bytes + x * sizeof(int) +
+ y * src_stride_y + z * src_stride_z;
+
+ __global uchar *dst_addr =
+ dst_ptr + dst_offset_first_element_in_bytes + x * 2 + y * dst_stride_y + z * dst_stride_z;
+
+ int4 input_values = vload4(0, (__global int *)src_addr);
+
+#if defined(ADD_BIAS)
+ // Add bias
+ __global uchar *bias_addr = biases_ptr + biases_offset_first_element_in_bytes + x * sizeof(int);
+
+ int4 biases_values = vload4(0, (__global int *)bias_addr);
+ input_values += (int4)biases_values;
+#endif // defined(ADD_BIAS)
+
+ // Multiply by result_mult_int and shift
+#if RESULT_SHIFT < 0
+ input_values = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(
+ input_values, RESULT_FIXEDPOINT_MULTIPLIER, RESULT_SHIFT, 4);
+#else // RESULT_SHIFT >= 0
+ input_values = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(
+ input_values, RESULT_FIXEDPOINT_MULTIPLIER, RESULT_SHIFT, 4);
+#endif // RESULT_SHIFT < 0
+
+ short4 res = convert_short4_sat(input_values);
+
+#if defined(MIN_BOUND)
+ res = max(res, (short4)MIN_BOUND);
+#endif // defined(MIN_BOUND)
+#if defined(MAX_BOUND)
+ res = min(res, (short4)MAX_BOUND);
+#endif // defined(MAX_BOUND)
+
+ // Store the result
+ vstore4(res, 0, (__global short *)dst_addr);
+}
+#endif // defined(RESULT_FIXEDPOINT_MULTIPLIER) && defined(RESULT_SHIFT)
+
+#if defined(REAL_MULTIPLIER) && defined(OUTPUT_OFFSET)
+/** This OpenCL kernel is used to quantize down the int32 accumulator values of GEMMLowp to
+ * QASYMM8/QASYMM8_SIGNED
+ *
+ * This kernel takes a final int32 accumulator value (the output of matrix multiplication), and
+ * processes it to obtain the final QASYMM8/QASYMM8_SIGNED value. The following computations will be
+ * performed by the kernel:
+ *
+ * -# Compute fixed point multiplication between each entry of input by
+ * result_fixedpoint_multiplier
+ * -# Add bias to final result if bias tensor is not a nullptr
+ * -# Requantize
+ * -# Add offset to each result
+ * -# Clamp the value between the specified min and max bounds
+ * -# Clamp the resulting int32 values:
+ * - to the [0..255] range and cast to QASYMM8.
+ * - to the [-128..127] range and cast to QASYMM8_SIGNED.
+ *
+ * @attention The offset and scalar scale factor must be passed at compile time using
+ * -DRESULT_OFFSET, -DREAL_MULTIPLIER
+ *
+ * @note In case the addition of int32 biases is required, -DADD_BIAS should be passed at compile
+ * time
+ * @note The output datatype should be passed at compile time using -DOUTPUT_DATA_TYPE
+ * @note In case the clamping of the result is required, the min and max bounds can be passed at
+ * compile time using -DMIN_BOUND and -DMAX_BOUND. These values can be used to implement "rectified
+ * linear unit" activation functions
+ *
+ * @param[in] src_ptr Pointer to the source tensor. Supported data
+ * type: S32
+ * @param[in] src_stride_x Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in] src_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src_stride_y Stride of the source tensor in Y dimension (in
+ * bytes)
+ * @param[in] src_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src_stride_z Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in] src_step_z src_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source
+ * tensor
+ * @param[in] biases_ptr Pointer to the biases tensor. Supported data
+ * type: same as @p src_ptr
+ * @param[in] biases_stride_x Stride of the biases tensor in X dimension (in
+ * bytes)
+ * @param[in] biases_step_x biases_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] biases_offset_first_element_in_bytes The offset of the first element in the biases
+ * tensor
+ * @param[out] dst_ptr Pointer to the destination tensor Supported data
+ * type: QASYMM8
+ * @param[in] dst_stride_x Stride of the destination tensor in X dimension
+ * (in bytes)
+ * @param[in] dst_step_x dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination tensor in Y dimension
+ * (in bytes)
+ * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_z Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in] dst_step_z src_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_w Stride of the source tensor in W dimension (in
+ * bytes)
+ * @param[in] dst_step_w src_stride_w * number of elements along W
+ * processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the
+ * destination tensor
+ */
+__kernel void gemmlowp_output_stage_quantize_down_float(TENSOR3D_DECLARATION(src),
+#if defined(ADD_BIAS)
+ VECTOR_DECLARATION(biases),
+#endif // defined(ADD_BIAS)
+#if defined(DST_HEIGHT)
+ TENSOR4D_DECLARATION(dst))
+#else // defined(DST_HEIGHT)
+ TENSOR3D_DECLARATION(dst))
+#endif // defined(DST_HEIGHT)
+{
+ // Compute source and destination addresses
+ int x = get_global_id(0) * 4;
+ int y = get_global_id(1);
+ int z = get_global_id(2);
+
+ __global uchar *src_addr = src_ptr + src_offset_first_element_in_bytes + x * sizeof(int) +
+ y * src_stride_y + z * src_stride_z;
+
+ __global uchar *dst_addr =
+ dst_ptr + dst_offset_first_element_in_bytes + x + y * dst_stride_y + z * dst_stride_z;
+
+ int4 input_values = vload4(0, (__global int *)src_addr);
+
+#if defined(ADD_BIAS)
+ // Add bias
+ __global uchar *bias_addr = biases_ptr + biases_offset_first_element_in_bytes + x * sizeof(int);
+
+ int4 biases_values = vload4(0, (__global int *)bias_addr);
+ input_values += (int4)biases_values;
+#endif // defined(ADD_BIAS)
+
+ // Convert to float
+ float4 input_values_f = convert_float4(input_values);
+ input_values_f = round(input_values_f * (float)REAL_MULTIPLIER + (float)OUTPUT_OFFSET);
+
+ VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4)
+ res = CONVERT_SAT(input_values_f, VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4));
+
+#if defined(MIN_BOUND)
+ res = max(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MIN_BOUND);
+#endif // defined(MIN_BOUND)
+#if defined(MAX_BOUND)
+ res = min(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MAX_BOUND);
+#endif // defined(MAX_BOUND)
+
+ // Store the result
+ vstore4(res, 0, (__global OUTPUT_DATA_TYPE *)dst_addr);
+}
+#endif // defined(REAL_MULTIPLIER) && defined(OUTPUT_OFFSET)
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "helpers.h"
+
+#if defined(DATA_TYPE) && defined(CONSTANT_VALUE) // Check for compile time constants
+
+/** Fill the tensor's planes with all value
+ * @attention The following variables must be passed at compile time:
+ * -# -DDATA_TYPE = Tensor data type. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * -# -DCONSTANT_VALUE = The value use to fill the tensor's planes
+ * -# -DVEC_SIZE = Vector size
+ * -# -DLAST_ACCESSED_X = The element that is on the X border (threads trying to set this, might
+ * need to step back a bit)
+ *
+ * @param[in] tensor_ptr Pointer to the source image. Data types
+ * supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[in] tensor_stride_x Stride of the source image in X dimension (in
+ * bytes)
+ * @param[in] tensor_step_x tensor_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] tensor_stride_y Stride of the source image in Y dimension (in
+ * bytes)
+ * @param[in] tensor_step_y tensor_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] tensor_offset_first_element_in_bytes The offset of the first element in the source
+ * image
+ * @param[in] value The value used to fill the pages of the tensor
+ */
+__kernel void memset(TENSOR3D_DECLARATION(tensor))
+{
+ Tensor3D tensor = CONVERT_TO_TENSOR3D_STRUCT(tensor);
+
+#if defined(VEC_SIZE)
+
+#if defined(LAST_ACCESSED_X)
+ // Check if access on width gets out of bounds
+ // If it does shift access vector to access elements within bounds
+ const int xi = (int)(get_global_id(0) * VEC_SIZE);
+ tensor.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * tensor_stride_x;
+#endif // defined(LAST_ACCESSED_X)
+
+ VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+ data = (DATA_TYPE)(CONSTANT_VALUE);
+
+ VSTORE(VEC_SIZE)
+ (data, 0, (__global DATA_TYPE *)tensor.ptr);
+#else // !defined(VEC_SIZE)
+ *((__global DATA_TYPE *)(tensor.ptr)) = (DATA_TYPE)(CONSTANT_VALUE);
+#endif // defined(VEC_SIZE)
+}
+
+#endif // Check for compile time constants
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Copyright (c) 2019-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "helpers.h"
+
+#if defined(DATA_TYPE) && defined(SELECT_DT) && defined(VEC_SIZE) && defined(PAD_X_BEFORE) && \
+ defined(SRC_WIDTH)
+
+#define VEC_TYPE VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+#define VEC_INT VEC_DATA_TYPE(int, VEC_SIZE)
+#define VEC_SELECT VEC_DATA_TYPE(SELECT_DT, VEC_SIZE)
+#define OFFSETS VEC_OFFS(VEC_SELECT, VEC_SIZE)
+
+#if defined(CONST_VAL)
+/** Perform a pad operation when PaddingMode is CONSTANT
+ *
+ * @note Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
+ * @note Vector size must be passed using the -DVEC_SIZE compile flag, e.g. -DVEC_SIZE=4
+ * @note Constant value used to fill the pads must be passed using the -DCONST_VAL compile flag,
+ * e.g. -DCONST_VAL=1.27
+ * @note Pad to add to the left must be passed using the -DPAD_X_BEFORE compile flag, e.g.
+ * -DPAD_X_BEFORE=5
+ * @note Input tensor's width must be passed using the -DSRC_WIDTH compile flag, e.g.
+ * -DSRC_WIDTH=224
+ * @note Data type to use for the select instruction must be passed using the -DSELECT_DT compile
+ * flag, e.g. -DSELECT_DT=float
+ * @note In case pad left is more than the vector size, the number of threads to skip along the X
+ * axis must be passed using the -DNUM_THREADS_TO_SKIP_X compile flag, e.g.
+ * -DNUM_THREADS_TO_SKIP_X=1. This is defined as (PAD_X_BEFORE / VEC_SIZE)
+ * @note If pad also needs to be added to the top of the tensor, the following compile flags must be
+ * passed at compile time:
+ * -# -DPAD_Y_BEFORE: Pad to add to the top of the input tensor (e.g. -DPAD_Y_BEFORE=3)
+ * -# -DSRC_HEIGHT: Input tensor's height (e.g. -DSRC_HEIGHT=127)
+ * @note If pad also needs to be added to the depth of the tensor, the following compile flags must
+ * be passed at compile time:
+ * -# -DPAD_Z_BEFORE: Pad to add before the first plane of the input tensor (e.g.
+ * -DPAD_Z_BEFORE=3)
+ * -# -DSRC_DEPTH: Input tensor's depth (e.g. -DSRC_DEPTH=32)
+ * @note If pad also needs to be added to the batch of the tensor, the following compile flags must
+ * be passed at compile time:
+ * -# -DPAD_W_BEFORE: Pad to add before the first batch of the input tensor (e.g.
+ * -DPAD_W_BEFORE=3)
+ * -# -DSRC_BATCH: Input tensor's batch size (e.g. -DSRC_BATCH=4)
+ *
+ * @param[in] src_ptr Pointer to the source image. Supported data types:
+ * U8, S8, QASYMM8, QASYMM8_SIGNED, U16, S16, U32, S32, F16, F32
+ * @param[in] src_stride_x Stride of the source image in X dimension (in
+ * bytes)
+ * @param[in] src_step_x src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in] src_stride_y Stride of the source image in Y dimension (in
+ * bytes)
+ * @param[in] src_step_y src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in] src_stride_z Stride of the source image in Z dimension (in
+ * bytes)
+ * @param[in] src_step_z src_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[out] dst_ptr Pointer to the destination image. Supported data
+ * types: same as @p src_ptr
+ * @param[in] dst_stride_x Stride of the destination image in X dimension (in
+ * bytes)
+ * @param[in] dst_step_x dst_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination image in Y dimension (in
+ * bytes)
+ * @param[in] dst_step_y dst_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in] dst_stride_z Stride of the destination image in Z dimension (in
+ * bytes)
+ * @param[in] dst_step_z dst_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * image
+ * @param[in] batch (Optional) Batch index if 4D pad must be applied
+ */
+__kernel void pad_layer_constant(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(dst)
+#if defined(PAD_W_BEFORE)
+ ,
+ uint batch
+#endif // defined(PAD_W_BEFORE)
+)
+{
+ const int x = get_global_id(0);
+ const int y = get_global_id(1);
+ const int z = get_global_id(2);
+
+ uint cond = 0;
+
+#if defined(PAD_W_BEFORE)
+ cond |= batch < PAD_W_BEFORE || batch >= (SRC_BATCH + PAD_W_BEFORE);
+#endif // defined(PAD_W_BEFORE)
+#if defined(PAD_Z_BEFORE)
+ cond |= z < PAD_Z_BEFORE || z >= (SRC_DEPTH + PAD_Z_BEFORE);
+#endif // defined(PAD_Z_BEFORE)
+
+ if (cond)
+ {
+ Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
+ VSTORE(VEC_SIZE)
+ ((VEC_TYPE)CONST_VAL, 0, (__global DATA_TYPE *)dst.ptr);
+ }
+ else
+ {
+ Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
+ Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+#if defined(NUM_THREADS_TO_SKIP_X)
+ /* In case the pad left is greater than the vector size, and we are past the threads operating
+ * solely on pad values, the input pointer must be brought back along the X axis to start from
+ * the first non-pad values.
+ *
+ * E.g. with VEC_SIZE=2, PAD_X_BEFORE=5, CONST_VAL=0 and 1D input |1 2 3 4 5 6|:
+ * -# The first thread will compute the output values |0 0| since it detects (x_outs == (0, 1))
+ * < PAD_X_BEFORE
+ * -# The second thread will compute the output values |0 0| since it detects (x_outs == (2,
+ * 3)) < PAD_X_BEFORE
+ * -# The third thread should compute |0 1|, however the input pointer is now ahead of ((x *
+ * VEC_SIZE) == 4) values, reading |4 5|
+ * -# To detect this, we use ((PAD_X_BEFORE / VEC_SIZE) == NUM_THREADS_TO_SKIP_X == 2) and
+ * check that it is >= to the current x
+ * -# So, we bring the pointer back of NUM_THREADS_TO_SKIP_X threads, which means multiplying
+ * this constant by the input's step along the X axis
+ * -# Now that the pointer is back of ((NUM_THREADS_TO_SKIP_X * src_step_x) == 4) values, it
+ * will read the desired values |0 1|
+ */
+ src.ptr -= select(0u, NUM_THREADS_TO_SKIP_X * src_step_x, x >= NUM_THREADS_TO_SKIP_X);
+#endif // defined(NUM_THREADS_TO_SKIP_X)
+#if defined(PAD_Z_BEFORE)
+ src.ptr -= PAD_Z_BEFORE * src_step_z;
+#endif // defined(PAD_Z_BEFORE)
+#if defined(PAD_W_BEFORE)
+ src.ptr -= PAD_W_BEFORE * SRC_DEPTH * src_step_z;
+#endif // defined(PAD_W_BEFORE)
+
+ VEC_TYPE src_vals = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)src.ptr);
+
+ VEC_INT xs_out = (VEC_INT)(x * VEC_SIZE) + CONVERT(OFFSETS, VEC_INT);
+ VEC_INT cond = xs_out < (VEC_INT)PAD_X_BEFORE || xs_out >= (VEC_INT)(SRC_WIDTH + PAD_X_BEFORE);
+#if defined(PAD_Y_BEFORE)
+ cond |=
+ (VEC_INT)y < (VEC_INT)PAD_Y_BEFORE || (VEC_INT)y >= (VEC_INT)(SRC_HEIGHT + PAD_Y_BEFORE);
+#endif // defined(PAD_Y_BEFORE)
+ VSTORE(VEC_SIZE)
+ (select(src_vals, (VEC_TYPE)CONST_VAL, CONVERT(cond, VEC_SELECT)), 0,
+ (__global DATA_TYPE *)dst.ptr);
+ }
+}
+#endif // defined(CONST_VAL)
+
+#if defined(PAD_X_BEFORE_REMAINDER) && defined(PAD_X_AFTER_REMAINDER) && \
+ defined(PAD_X_BEFORE_REMAINDER_REFL) && defined(PAD_X_AFTER_REMAINDER_REFL) && \
+ defined(AFTER_PAD_FACT_X)
+
+#define SCALAR_COND(x) (VEC_SELECT) x == (VEC_SELECT)1
+#define ROTATE_REVERSE(x, n) ROTATE(REVERSE(x, VEC_SIZE), VEC_SIZE, n)
+#define SYMM_REFL_LEFT(x, n0, n1) \
+ select(ROTATE_REVERSE(x, n1), ROTATE(x, VEC_SIZE, n0), OFFSETS >= (VEC_SELECT)n0)
+#define SYMM_REFL_RIGHT(x, n0, n1) \
+ select(ROTATE(x, VEC_SIZE, n0), ROTATE_REVERSE(x, n1), OFFSETS >= (VEC_SELECT)n0)
+
+/** Perform a pad operation when PaddingMode is SYMMETRIC
+ *
+ * @note Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
+ * @note Vector size must be passed using the -DVEC_SIZE compile flag, e.g. -DVEC_SIZE=4
+ * @note Constant value must be passed using the -DCONST_VAL compile flag, e.g. -DCONST_VAL=1.27
+ * @note Pad to add to the left must be passed using the -DPAD_X_BEFORE compile flag, e.g.
+ * -DPAD_X_BEFORE=5
+ * @note Input tensor's width must be passed using the -DSRC_WIDTH compile flag, e.g.
+ * -DSRC_WIDTH=224
+ * @note Data type to use for the select instruction must be passed using the -DSELECT_DT compile
+ * flag, e.g. -DSELECT_DT=float
+ * @note Number of values to the left when operating across left padding must be passed using the
+ * -DPAD_X_BEFORE_REMAINDER compile flag, e.g. -DPAD_X_BEFORE_REMAINDER=5
+ * @note Number of values to the left when operating across right padding must be passed using the
+ * -DPAD_X_AFTER_REMAINDER compile flag, e.g. -DPAD_X_AFTER_REMAINDER=6
+ * @note To rearrange the vectors properly, (PAD_X_BEFORE_REMAINDER + 1) must be passed when mode is
+ * REFLECT using the -DPAD_X_BEFORE_REMAINDER_REFL compile flag, e.g. -DPAD_X_BEFORE_REMAINDER=6
+ * @note To rearrange the vectors properly, (PAD_X_AFTER_REMAINDER - 1) must be passed using the
+ * -DPAD_X_AFTER_REMAINDER_REFL compile flag, e.g. -DPAD_X_AFTER_REMAINDER=5
+ * @note When after pad X, starting point to read backward from must be passed using the
+ * -DAFTER_PAD_FACT_X compile flag, e.g. -DAFTER_PAD_FACT_X=253
+ * @note If padding mode is REFLECT, the -DIS_REFLECT compile flag must be set to 1, else it must be
+ * set to 0
+ * @note If pad also needs to be added to the top of the tensor, the following compile flags must be
+ * passed at compile time:
+ * -# -DPAD_Y_BEFORE: Pad to add to the top of the input tensor (e.g. -DPAD_Y_BEFORE=3)
+ * -# -DSRC_HEIGHT: Input tensor's height (e.g. -DSRC_HEIGHT=127)
+ * @note If pad also needs to be added to the depth of the tensor, the following compile flags must
+ * be passed at compile time:
+ * -# -DPAD_Z_BEFORE: Pad to add before the first plane of the input tensor (e.g.
+ * -DPAD_Z_BEFORE=3)
+ * -# -DSRC_DEPTH: Input tensor's depth (e.g. -DSRC_DEPTH=32)
+ * @note If the starting point to read backward from is less than the output's last element accessed
+ * in the X, the following compile flags must be passed at compile time to avoid negative offsets:
+ * -# -DAFTER_PAD_REM: Defines how much to rotate the vector if the backward calculation
+ * attempted to read from a negative offset (e.g. -DAFTER_PAD_REM=3)
+ *
+ * @param[in] src_ptr Pointer to the source image. Supported data types:
+ * U8, S8, QASYMM8, QASYMM8_SIGNED, U16, S16, U32, S32, F16, F32
+ * @param[in] src_stride_x Stride of the source image in X dimension (in
+ * bytes)
+ * @param[in] src_step_x src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in] src_stride_y Stride of the source image in Y dimension (in
+ * bytes)
+ * @param[in] src_step_y src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in] src_stride_z Stride of the source image in Z dimension (in
+ * bytes)
+ * @param[in] src_step_z src_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[out] dst_ptr Pointer to the destination image. Supported data
+ * types: same as @p src_ptr
+ * @param[in] dst_stride_x Stride of the destination image in X dimension (in
+ * bytes)
+ * @param[in] dst_step_x dst_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination image in Y dimension (in
+ * bytes)
+ * @param[in] dst_step_y dst_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in] dst_stride_z Stride of the destination image in Z dimension (in
+ * bytes)
+ * @param[in] dst_step_z dst_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * image
+ */
+__kernel void pad_layer_symmetric_reflect(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(dst))
+{
+ // Get current thread position
+ const int x = get_global_id(0);
+ const int y = get_global_id(1);
+ const int z = get_global_id(2);
+
+ // Define conditions based on the thread X position w.r.t. pad left and right
+ const int x_out_first = x * VEC_SIZE;
+ const int x_out_last = x_out_first + VEC_SIZE;
+ const int is_before_pad_left = (x_out_last <= PAD_X_BEFORE);
+ const int is_across_pad_left = (x_out_first < PAD_X_BEFORE) && (x_out_last > PAD_X_BEFORE);
+ const int is_inside_input =
+ (x_out_first >= PAD_X_BEFORE) && (x_out_last <= (SRC_WIDTH + PAD_X_BEFORE));
+ const int is_across_pad_right =
+ (x_out_first < (SRC_WIDTH + PAD_X_BEFORE)) && (x_out_last > (SRC_WIDTH + PAD_X_BEFORE));
+ const int is_after_pad_right = (x_out_first >= (SRC_WIDTH + PAD_X_BEFORE));
+
+ // Calculate base pointers
+ __global uchar *src_addr = src_ptr + src_offset_first_element_in_bytes;
+ Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+ // Calculate input tensor's offset based on the defined conditions
+ int x_offset = 0;
+ x_offset = select(x_offset, PAD_X_BEFORE - x_out_last + IS_REFLECT, is_before_pad_left);
+ x_offset = select(x_offset, x_out_first - PAD_X_BEFORE, is_inside_input);
+ x_offset = select(x_offset, SRC_WIDTH - VEC_SIZE, is_across_pad_right);
+ x_offset = select(x_offset, AFTER_PAD_FACT_X - x_out_last, is_after_pad_right);
+
+#if defined(AFTER_PAD_REM)
+ int neg_offs = x_offset < 0;
+ x_offset = max(x_offset, 0);
+#endif // defined(AFTER_PAD_REM)
+
+ // Load input values from the computed offset
+ int y_in = y;
+ int z_in = z;
+#if defined(PAD_Y_BEFORE)
+ y_in = select(y - PAD_Y_BEFORE, PAD_Y_BEFORE - y + IS_REFLECT - 1, y < PAD_Y_BEFORE);
+ y_in = select(y_in, 2 * SRC_HEIGHT + PAD_Y_BEFORE - y - IS_REFLECT - 1,
+ y >= (SRC_HEIGHT + PAD_Y_BEFORE));
+#endif // defined(PAD_Y_BEFORE)
+#if defined(PAD_Z_BEFORE)
+ z_in = select(z - PAD_Z_BEFORE, PAD_Z_BEFORE - z + IS_REFLECT - 1, z < PAD_Z_BEFORE);
+ z_in = select(z_in, 2 * SRC_DEPTH + PAD_Z_BEFORE - z - IS_REFLECT - 1,
+ z >= (SRC_DEPTH + PAD_Z_BEFORE));
+#endif // defined(PAD_Y_BEFORE)
+
+ src_addr += x_offset * src_stride_x + y_in * src_step_y + z_in * src_step_z;
+
+#if SRC_WIDTH == 1
+ VSTORE(VEC_SIZE)
+ ((VEC_TYPE)(*(__global DATA_TYPE *)src_addr), 0, (__global DATA_TYPE *)dst.ptr);
+#else // SRC_WIDTH == 1
+
+ VEC_TYPE src_vals = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)src_addr);
+
+ // Choose rearrangement policy based on the defined conditions
+ src_vals =
+ select(src_vals, SYMM_REFL_LEFT(src_vals, PAD_X_BEFORE_REMAINDER, PAD_X_BEFORE_REMAINDER_REFL),
+ SCALAR_COND(is_across_pad_left));
+ src_vals =
+ select(src_vals, SYMM_REFL_RIGHT(src_vals, PAD_X_AFTER_REMAINDER, PAD_X_AFTER_REMAINDER_REFL),
+ SCALAR_COND(is_across_pad_right));
+ src_vals = select(src_vals, REVERSE(src_vals, VEC_SIZE),
+ SCALAR_COND((is_before_pad_left || is_after_pad_right)));
+#if defined(AFTER_PAD_REM)
+ src_vals = select(src_vals, ROTATE(src_vals, VEC_SIZE, AFTER_PAD_REM), SCALAR_COND(neg_offs));
+#endif // defined(AFTER_PAD_REM)
+
+ // Store
+ VSTORE(VEC_SIZE)
+ (src_vals, 0, (__global DATA_TYPE *)dst.ptr);
+#endif // SRC_WIDTH == 1
+}
+#endif // defined(PAD_X_BEFORE_REMAINDER) && defined(PAD_X_AFTER_REMAINDER) &&
+ // defined(PAD_X_BEFORE_REMAINDER_REFL) && defined(PAD_X_AFTER_REMAINDER_REFL) &&
+ // defined(AFTER_PAD_FACT_X)
+#endif // defined(DATA_TYPE) && defined(SELECT_DT) && defined(VEC_SIZE) && defined(PAD_X_BEFORE) &&
+ // defined(SRC_WIDTH)
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_REPEAT_H
+#define ARM_COMPUTE_REPEAT_H
+
+#include "helpers.h"
+
+/** Macros that help in loop unrolling */
+// Repeat macros with 3 param, excluding the implicit ID param
+#define REPEAT_3_1(P_X, P_A, P_B, P_C) P_X##_DEF(0, P_A, P_B, P_C)
+#define REPEAT_3_2(P_X, P_A, P_B, P_C) \
+ P_X##_DEF(1, P_A, P_B, P_C); \
+ REPEAT_3_1(P_X, P_A, P_B, P_C)
+#define REPEAT_3_3(P_X, P_A, P_B, P_C) \
+ P_X##_DEF(2, P_A, P_B, P_C); \
+ REPEAT_3_2(P_X, P_A, P_B, P_C)
+#define REPEAT_3_4(P_X, P_A, P_B, P_C) \
+ P_X##_DEF(3, P_A, P_B, P_C); \
+ REPEAT_3_3(P_X, P_A, P_B, P_C)
+#define REPEAT_3_5(P_X, P_A, P_B, P_C) \
+ P_X##_DEF(4, P_A, P_B, P_C); \
+ REPEAT_3_4(P_X, P_A, P_B, P_C)
+#define REPEAT_3_6(P_X, P_A, P_B, P_C) \
+ P_X##_DEF(5, P_A, P_B, P_C); \
+ REPEAT_3_5(P_X, P_A, P_B, P_C)
+#define REPEAT_3_7(P_X, P_A, P_B, P_C) \
+ P_X##_DEF(6, P_A, P_B, P_C); \
+ REPEAT_3_6(P_X, P_A, P_B, P_C)
+#define REPEAT_3_8(P_X, P_A, P_B, P_C) \
+ P_X##_DEF(7, P_A, P_B, P_C); \
+ REPEAT_3_7(P_X, P_A, P_B, P_C)
+#define REPEAT_3_9(P_X, P_A, P_B, P_C) \
+ P_X##_DEF(8, P_A, P_B, P_C); \
+ REPEAT_3_8(P_X, P_A, P_B, P_C)
+#define REPEAT_3_10(P_X, P_A, P_B, P_C) \
+ P_X##_DEF(9, P_A, P_B, P_C); \
+ REPEAT_3_9(P_X, P_A, P_B, P_C)
+#define REPEAT_3_11(P_X, P_A, P_B, P_C) \
+ P_X##_DEF(A, P_A, P_B, P_C); \
+ REPEAT_3_10(P_X, P_A, P_B, P_C)
+#define REPEAT_3_12(P_X, P_A, P_B, P_C) \
+ P_X##_DEF(B, P_A, P_B, P_C); \
+ REPEAT_3_11(P_X, P_A, P_B, P_C)
+#define REPEAT_3_13(P_X, P_A, P_B, P_C) \
+ P_X##_DEF(C, P_A, P_B, P_C); \
+ REPEAT_3_12(P_X, P_A, P_B, P_C)
+#define REPEAT_3_14(P_X, P_A, P_B, P_C) \
+ P_X##_DEF(D, P_A, P_B, P_C); \
+ REPEAT_3_13(P_X, P_A, P_B, P_C)
+#define REPEAT_3_15(P_X, P_A, P_B, P_C) \
+ P_X##_DEF(E, P_A, P_B, P_C); \
+ REPEAT_3_14(P_X, P_A, P_B, P_C)
+#define REPEAT_3_16(P_X, P_A, P_B, P_C) \
+ P_X##_DEF(F, P_A, P_B, P_C); \
+ REPEAT_3_15(P_X, P_A, P_B, P_C)
+
+#define REPEAT_DEF_3_N(P_NUM, P_OP, P_A, P_B, P_C) \
+ REPEAT_3_##P_NUM(P_OP, P_A, P_B, P_C) // One level of indirection to ensure order of expansion
+ // does not affect preprocessing P_NUM
+#define REPEAT_3_N(P_NUM, P_OP, P_A, P_B, P_C) REPEAT_DEF_3_N(P_NUM, P_OP, P_A, P_B, P_C)
+
+// Repeat macros with 4 param, excluding the implicit ID param
+#define REPEAT_4_1(P_X, P_A, P_B, P_C, P_D) P_X##_DEF(0, P_A, P_B, P_C, P_D)
+#define REPEAT_4_2(P_X, P_A, P_B, P_C, P_D) \
+ P_X##_DEF(1, P_A, P_B, P_C, P_D); \
+ REPEAT_4_1(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_3(P_X, P_A, P_B, P_C, P_D) \
+ P_X##_DEF(2, P_A, P_B, P_C, P_D); \
+ REPEAT_4_2(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_4(P_X, P_A, P_B, P_C, P_D) \
+ P_X##_DEF(3, P_A, P_B, P_C, P_D); \
+ REPEAT_4_3(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_5(P_X, P_A, P_B, P_C, P_D) \
+ P_X##_DEF(4, P_A, P_B, P_C, P_D); \
+ REPEAT_4_4(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_6(P_X, P_A, P_B, P_C, P_D) \
+ P_X##_DEF(5, P_A, P_B, P_C, P_D); \
+ REPEAT_4_5(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_7(P_X, P_A, P_B, P_C, P_D) \
+ P_X##_DEF(6, P_A, P_B, P_C, P_D); \
+ REPEAT_4_6(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_8(P_X, P_A, P_B, P_C, P_D) \
+ P_X##_DEF(7, P_A, P_B, P_C, P_D); \
+ REPEAT_4_7(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_9(P_X, P_A, P_B, P_C, P_D) \
+ P_X##_DEF(8, P_A, P_B, P_C, P_D); \
+ REPEAT_4_8(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_10(P_X, P_A, P_B, P_C, P_D) \
+ P_X##_DEF(9, P_A, P_B, P_C, P_D); \
+ REPEAT_4_9(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_11(P_X, P_A, P_B, P_C, P_D) \
+ P_X##_DEF(A, P_A, P_B, P_C, P_D); \
+ REPEAT_4_10(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_12(P_X, P_A, P_B, P_C, P_D) \
+ P_X##_DEF(B, P_A, P_B, P_C, P_D); \
+ REPEAT_4_11(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_13(P_X, P_A, P_B, P_C, P_D) \
+ P_X##_DEF(C, P_A, P_B, P_C, P_D); \
+ REPEAT_4_12(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_14(P_X, P_A, P_B, P_C, P_D) \
+ P_X##_DEF(D, P_A, P_B, P_C, P_D); \
+ REPEAT_4_13(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_15(P_X, P_A, P_B, P_C, P_D) \
+ P_X##_DEF(E, P_A, P_B, P_C, P_D); \
+ REPEAT_4_14(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_16(P_X, P_A, P_B, P_C, P_D) \
+ P_X##_DEF(F, P_A, P_B, P_C, P_D); \
+ REPEAT_4_15(P_X, P_A, P_B, P_C, P_D)
+
+#define REPEAT_DEF_4_N(P_NUM, P_OP, P_A, P_B, P_C, P_D) \
+ REPEAT_4_##P_NUM(P_OP, P_A, P_B, P_C, P_D) // One level of indirection to ensure order of
+ // expansion does not affect preprocessing P_NUM
+#define REPEAT_4_N(P_NUM, P_OP, P_A, P_B, P_C, P_D) REPEAT_DEF_4_N(P_NUM, P_OP, P_A, P_B, P_C, P_D)
+
+// Macro for initializing N variables. Generates N statements that defines VAR##N =
+// RHS_ACCESSOR_DEF(...)
+#define VAR_INIT_TO_CONST_DEF(ID, TYPE, VAR, VAL) TYPE VAR##ID = VAL
+#define REPEAT_VAR_INIT_TO_CONST(N, TYPE, VAR, VAL) REPEAT_3_N(N, VAR_INIT_TO_CONST, TYPE, VAR, VAL)
+
+// Macro for initializing N variables by converting the data type. Generates N statements that
+// defines VAR##N = RHS_ACCESSOR_DEF(...)
+#define VAR_INIT_CONVERT_SAT_DEF(ID, TYPE_OUT, VAR_IN, VAR_OUT) \
+ TYPE_OUT VAR_OUT##ID = CONVERT_SAT(VAR_IN##ID, TYPE_OUT)
+#define REPEAT_VAR_INIT_CONVERT_SAT(N, TYPE_OUT, VAR_IN, VAR_OUT) \
+ REPEAT_3_N(N, VAR_INIT_CONVERT_SAT, TYPE_OUT, VAR_IN, VAR_OUT)
+
+// Macro for adding a constant to N variables. Generates N statements that defines VAR##N
+// =RHS_ACCESSOR_DEF(...)
+#define ADD_CONST_TO_VAR_DEF(ID, TYPE, VAR, VAL) VAR##ID += (TYPE)VAL
+#define REPEAT_ADD_CONST_TO_VAR(N, TYPE, VAR, VAL) REPEAT_3_N(N, ADD_CONST_TO_VAR, TYPE, VAR, VAL)
+
+// Macro for multiplying N variables (VAR_B) by a constant (VAL) and adding to other N variables
+// (VAR_A). Generates N statements that defines VAR_A##N =RHS_ACCESSOR_DEF(...)
+#define MLA_VAR_WITH_CONST_VEC_DEF(ID, VAR_A, VAR_B, VAL) VAR_A##ID += VAR_B##ID * VAL
+#define REPEAT_MLA_VAR_WITH_CONST_VEC(N, VAR_A, VAR_B, VAL) \
+ REPEAT_3_N(N, MLA_VAR_WITH_CONST_VEC, VAR_A, VAR_B, VAL)
+
+// Macro for adding a vector to N-variables. Generates N statements that defines VAR##N
+// =RHS_ACCESSOR_DEF(...)
+#define ADD_VECTOR_TO_VAR_DEF(ID, TYPE, VAR, VEC) VAR##ID += VEC
+#define REPEAT_ADD_VECTOR_TO_VAR(N, VAR, VEC) REPEAT_3_N(N, ADD_VECTOR_TO_VAR, "", VAR, VEC)
+
+// Macro for adding a two N-variables. Generates N statements that defines VAR##N
+// =RHS_ACCESSOR_DEF(...)
+#define ADD_TWO_VARS_DEF(ID, TYPE, VAR_A, VAR_B) VAR_A##ID += VAR_B##ID
+#define REPEAT_ADD_TWO_VARS(N, VAR_A, VAR_B) REPEAT_3_N(N, ADD_TWO_VARS, "", VAR_A, VAR_B)
+
+// Macro for performing Max between a constant and N variables. Generates N statements that defines
+// VAR##N =RHS_ACCESSOR_DEF(...)
+#define MAX_CONST_VAR_DEF(ID, TYPE, VAR, VAL) VAR##ID = max(VAR##ID, (TYPE)VAL)
+#define REPEAT_MAX_CONST_VAR(N, TYPE, VAR, VAL) REPEAT_3_N(N, MAX_CONST_VAR, TYPE, VAR, VAL)
+
+// Macro for performing Min between a constant and N variables. Generates N statements that defines
+// VAR##N =RHS_ACCESSOR_DEF(...)
+#define MIN_CONST_VAR_DEF(ID, TYPE, VAR, VAL) VAR##ID = min(VAR##ID, (TYPE)VAL)
+#define REPEAT_MIN_CONST_VAR(N, TYPE, VAR, VAL) REPEAT_3_N(N, MIN_CONST_VAR, TYPE, VAR, VAL)
+
+// Macro for performing ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE to N variables. Generates N
+// statements that defines VAR##N =RHS_ACCESSOR_DEF(...)
+#define ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE_DEF(ID, SIZE, VAR, RES_MUL, RES_SHIFT) \
+ VAR##ID = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(VAR##ID, RES_MUL, RES_SHIFT, SIZE)
+#define REPEAT_ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(N, SIZE, VAR, RES_MUL, RES_SHIFT) \
+ REPEAT_4_N(N, ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE, SIZE, VAR, RES_MUL, RES_SHIFT)
+
+// Macro for performing ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE to N variables. Generates N
+// statements that defines VAR##N =RHS_ACCESSOR_DEF(...)
+#define ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE_DEF(ID, SIZE, VAR, RES_MUL, RES_SHIFT) \
+ VAR##ID = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(VAR##ID, RES_MUL, RES_SHIFT, SIZE)
+#define REPEAT_ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(N, SIZE, VAR, RES_MUL, RES_SHIFT) \
+ REPEAT_4_N(N, ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE, SIZE, VAR, RES_MUL, RES_SHIFT)
+
+// Macro for performing per-channel ASYMM_MULT_BY_QUANT_MULTIPLIER to N variables.
+#define ASYMM_MULT_BY_QUANT_MULTIPLIER_PER_CHANNEL_DEF(ID, SIZE, VAR, RES_MUL, RES_SHIFT) \
+ ({ \
+ VEC_DATA_TYPE(int, N0) \
+ VAR##ID_shift_lt0 = \
+ ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(VAR##ID, RES_MUL, RES_SHIFT, N0); \
+ VEC_DATA_TYPE(int, N0) \
+ VAR##ID_shift_gt0 = \
+ ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(VAR##ID, RES_MUL, RES_SHIFT, N0); \
+ VAR##ID = select(VAR##ID_shift_lt0, VAR##ID_shift_gt0, RES_SHIFT >= 0); \
+ })
+#define REPEAT_ASYMM_MULT_BY_QUANT_MULTIPLIER_PER_CHANNEL(N, SIZE, VAR, RES_MUL, RES_SHIFT) \
+ REPEAT_4_N(N, ASYMM_MULT_BY_QUANT_MULTIPLIER_PER_CHANNEL, SIZE, VAR, RES_MUL, RES_SHIFT)
+
+#endif // ARM_COMPUTE_REPEAT_H
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "helpers.h"
+
+/** Perform tensor reshape
+ *
+ * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g.
+ * -DDATA_TYPE=short
+ *
+ * @param[in] input_ptr Pointer to the first source tensor. Supported
+ * data types: All
+ * @param[in] input_stride_x Stride of the first source tensor in X dimension
+ * (in bytes)
+ * @param[in] input_step_x input_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] input_stride_y Stride of the first source tensor in Y dimension
+ * (in bytes)
+ * @param[in] input_step_y input_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] input_stride_z Stride of the first source tensor in Z dimension
+ * (in bytes)
+ * @param[in] input_step_z input_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in] input_offset_first_element_in_bytes The offset of the first element in the first
+ * source tensor
+ * @param[out] output_ptr Pointer to the destination tensor. Supported
+ * data types: same as @p input_ptr
+ * @param[in] output_stride_x Stride of the destination tensor in X dimension
+ * (in bytes)
+ * @param[in] output_step_x output_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] output_stride_y Stride of the destination tensor in Y dimension
+ * (in bytes)
+ * @param[in] output_step_y output_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] output_stride_z Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in] output_step_z output_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in] output_offset_first_element_in_bytes The offset of the first element in the
+ * destination tensor
+ * @param[in] input_shape Input spatial shape
+ * @param[in] output_shape Output spatial shape
+ */
+__kernel void reshape_layer(TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(output),
+ int2 input_shape, int2 output_shape)
+{
+ Tensor3D in = CONVERT_TO_TENSOR3D_STRUCT(input);
+ Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(output);
+
+ int3 id = (int3)(get_global_id(0), get_global_id(1), get_global_id(2));
+
+ // Linearize index
+ int linear_idx = id.x + id.y * input_shape.x + id.z * input_shape.x * input_shape.y;
+
+ // Translate to output
+ int3 out_id;
+ out_id.x = linear_idx % output_shape.x;
+ out_id.y = (linear_idx / output_shape.x) % output_shape.y;
+ out_id.z = linear_idx / (output_shape.x * output_shape.y);
+
+ // Store result
+ *((__global DATA_TYPE *)tensor3D_offset(&out, out_id.x, out_id.y, out_id.z)) =
+ *((__global DATA_TYPE *)in.ptr);
+}
*/
#include "arm_compute/core/CL/kernels/CLArgMinMaxLayerKernelEx.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/CLValidate.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
+#include "arm_compute/core/CL/CLKernelLibraryEx.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibraryEx.h"
#include "arm_compute/core/CL/ICLTensor.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/AccessWindowStatic.h"
#include "support/StringSupport.h"
using namespace arm_compute;
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
+
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
#include "support/StringSupport.h"
#include <cstddef>
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibraryEx.h"
#include "arm_compute/core/CL/ICLTensor.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+
#include "support/StringSupport.h"
using namespace arm_compute;
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
#include "arm_compute/core/UtilsEx.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
#include "support/StringSupport.h"
using namespace arm_compute;
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibraryEx.h"
#include "arm_compute/core/CL/ICLTensor.h"
+#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
using namespace arm_compute;
_hits = hits;
// Make _lookup_indices tensor
- _lookup_indices = support::cpp14::make_unique<CLTensor>();
+ _lookup_indices = std::make_unique<CLTensor>();
_lookup_indices->allocator()->init(
TensorInfo(lookups->info()->tensor_shape(), lookups->info()->num_channels(), DataType::S32));
_lookup_indices->allocator()->allocate();
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Window.h"
+
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
#include "support/StringSupport.h"
#include "support/ToolchainSupport.h"
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
+
+#include "arm_compute/core/CL/CLKernelLibraryEx.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "support/StringSupport.h"
+
+namespace arm_compute
+{
+CLMemsetKernel::CLMemsetKernel() : ICLKernel(), _tensor(nullptr), _full_window() {}
+
+void CLMemsetKernel::configure(ICLTensor *tensor, const PixelValue &constant_value, Window *window)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), tensor, constant_value, window);
+}
+
+void CLMemsetKernel::configure(const CLCompileContext &compile_context, ICLTensor *tensor,
+ const PixelValue &constant_value, Window *window)
+{
+ ARM_COMPUTE_UNUSED(compile_context);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);
+ ARM_COMPUTE_ERROR_THROW_ON(validate(tensor->info(), constant_value, window));
+
+ _tensor = tensor;
+
+ const DataType data_type = tensor->info()->data_type();
+ const int vec_size_x = 16 / tensor->info()->element_size();
+
+ // Create and update the window (if needed)
+ _full_window = calculate_max_window(*tensor->info());
+ Window win = _full_window;
+ if (window != nullptr)
+ {
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(win, *window);
+ win = *window;
+ }
+
+ const int output_width_x = win.num_iterations(0);
+ const bool multi_access_x = output_width_x >= vec_size_x;
+ const bool remainder_x = output_width_x % vec_size_x > 0;
+
+ if (multi_access_x)
+ {
+ win.set(
+ Window::DimX,
+ Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x), vec_size_x));
+ }
+ ICLKernel::configure_internal(win);
+
+ // Create kernel
+ CLBuildOptions build_opts;
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type));
+ build_opts.add_option("-DCONSTANT_VALUE=" + string_from_pixel_value(constant_value, data_type));
+ build_opts.add_option_if(multi_access_x, "-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
+ build_opts.add_option_if(multi_access_x && remainder_x,
+ "-DLAST_ACCESSED_X=" + support::cpp11::to_string(
+ std::max<int>(output_width_x - vec_size_x, 0)));
+
+ _kernel =
+ static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("memset", build_opts.options()));
+}
+
+Status CLMemsetKernel::validate(const ITensorInfo *tensor, const PixelValue &constant_value,
+ Window *window)
+{
+ ARM_COMPUTE_UNUSED(tensor);
+ ARM_COMPUTE_UNUSED(constant_value);
+ if (window != nullptr)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON(window->x().step() != 1);
+ }
+ return Status{};
+}
+
+void CLMemsetKernel::run(const Window &window, cl::CommandQueue &queue)
+{
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+
+ // Collapse all the batches on the third
+ Window collapsed = window.collapse_if_possible(_full_window, Window::DimZ);
+ Window slice = collapsed.first_slice_window_3D();
+
+ do
+ {
+ unsigned int idx = 0;
+ add_3D_tensor_argument(idx, _tensor, slice);
+ enqueue(queue, *this, slice, lws_hint());
+ } while (collapsed.slide_window_slice_3D(slice));
+}
+} // namespace arm_compute
#include "arm_compute/core/CL/kernels/CLMultiplyScaleFactorKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
#include "support/StringSupport.h"
using namespace arm_compute;
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibraryEx.h"
#include "arm_compute/core/CL/ICLTensor.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+
#include "support/StringSupport.h"
using namespace arm_compute;
#include "arm_compute/core/CL/CLKernelLibraryEx.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
#include "support/StringSupport.h"
#include <string>
namespace arm_compute
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/CL/kernels/CLPadLayerKernelEx.h"
+
+#include "arm_compute/core/CL/CLKernelLibraryEx.h"
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "support/StringSupport.h"
+
+namespace arm_compute
+{
+namespace
+{
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
+ const PaddingList &padding, PixelValue constant_value, PaddingMode mode)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_UNUSED(constant_value);
+ ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
+ ARM_COMPUTE_RETURN_ERROR_ON(padding.size() > input->num_dimensions());
+ if (mode == PaddingMode::REFLECT || mode == PaddingMode::SYMMETRIC)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON(padding.size() > 3);
+
+ const auto is_reflect = static_cast<unsigned int>(mode == PaddingMode::REFLECT);
+ for (size_t i = 0; i < padding.size(); ++i)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON(padding.at(i).first > (input->dimension(i) - is_reflect));
+ ARM_COMPUTE_RETURN_ERROR_ON(padding.at(i).second > (input->dimension(i) - is_reflect));
+ }
+ }
+
+ if (output->total_size() > 0)
+ {
+ TensorShape padded_shape =
+ misc::shape_calculator::compute_padded_shape(input->tensor_shape(), padding);
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(output, input);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), padded_shape);
+ }
+
+ return Status{};
+}
+
+std::pair<Status, Window>
+validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const PaddingList &padding,
+ PixelValue constant_value, PaddingMode mode,
+ unsigned int &num_elems_processed_per_iteration)
+{
+ ARM_COMPUTE_UNUSED(constant_value, mode);
+
+ const TensorShape padded_shape =
+ misc::shape_calculator::compute_padded_shape(input->tensor_shape(), padding);
+ auto_init_if_empty(*output, input->clone()->set_tensor_shape(padded_shape));
+
+ num_elems_processed_per_iteration =
+ std::min(16U, 32U / static_cast<unsigned int>(element_size_from_data_type(input->data_type())));
+ if (input->dimension(0) < num_elems_processed_per_iteration)
+ {
+ num_elems_processed_per_iteration =
+ 1 << static_cast<unsigned int>(std::log2(input->dimension(0)));
+ }
+
+ // Configure kernel window
+ Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
+
+ const int input_start_x =
+ mode == PaddingMode::CONSTANT ? -(padding.at(0).first % num_elems_processed_per_iteration) : 0;
+ const int input_start_y =
+ (mode == PaddingMode::CONSTANT && padding.size() > 1) ? -padding.at(1).first : 0;
+
+ AccessWindowRectangle input_access(input, input_start_x, input_start_y,
+ num_elems_processed_per_iteration, 1);
+ AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
+
+ const bool window_changed = update_window_and_padding(win, input_access, output_access);
+ output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
+
+ Status err = (window_changed)
+ ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
+ : Status{};
+ return std::make_pair(err, win);
+}
+} // namespace
+
+CLPadLayerKernelEx::CLPadLayerKernelEx()
+ : _input(nullptr), _output(nullptr), _input_start_x(0), _input_start_y(0), _4d_enabled(false)
+{
+}
+
+void CLPadLayerKernelEx::configure(const ICLTensor *input, ICLTensor *output,
+ const PaddingList &padding, PixelValue constant_value,
+ PaddingMode mode)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, padding, constant_value,
+ mode);
+}
+
+void CLPadLayerKernelEx::configure(const CLCompileContext &compile_context, const ICLTensor *input,
+ ICLTensor *output, const PaddingList &padding,
+ PixelValue constant_value, PaddingMode mode)
+{
+ ARM_COMPUTE_UNUSED(compile_context);
+ // Perform validation step
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_ERROR_THROW_ON(
+ validate_arguments(input->info(), output->info(), padding, constant_value, mode));
+
+ _input = input;
+ _output = output;
+ _4d_enabled = (mode == PaddingMode::CONSTANT) && (padding.size() > 3);
+
+ // Configure window
+ unsigned int vec_size;
+ auto win_config = validate_and_configure_window(input->info(), output->info(), padding,
+ constant_value, mode, vec_size);
+ ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+ ICLKernel::configure_internal(win_config.second);
+
+ // Set build options
+ std::string kernel_name = "pad_layer_";
+
+ const DataType &data_type = input->info()->data_type();
+ const unsigned int input_width = input->info()->dimension(0);
+ const unsigned int input_height = input->info()->dimension(1);
+ const unsigned int input_depth = input->info()->dimension(2);
+ const unsigned int pad_x_before = padding.at(0).first;
+ const unsigned int pad_y_before = padding.size() > 1 ? padding.at(1).first : 0;
+ const unsigned int pad_z_before = padding.size() > 2 ? padding.at(2).first : 0;
+ const unsigned int pad_right_start = input_width + pad_x_before;
+
+ _input_start_x = mode == PaddingMode::CONSTANT ? -(pad_x_before % vec_size) : 0;
+ _input_start_y = (mode == PaddingMode::CONSTANT && padding.size() > 1) ? -padding.at(1).first : 0;
+
+ CLBuildOptions build_opts;
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type));
+ build_opts.add_option("-DSELECT_DT=" + get_cl_select_type_from_data_type(data_type));
+ build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size));
+ build_opts.add_option("-DPAD_X_BEFORE=" + support::cpp11::to_string(pad_x_before));
+ build_opts.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(input_width));
+ if (padding.size() > 1)
+ {
+ build_opts.add_option("-DPAD_Y_BEFORE=" + support::cpp11::to_string(pad_y_before));
+ build_opts.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(input_height));
+
+ if (padding.size() > 2)
+ {
+ build_opts.add_option("-DPAD_Z_BEFORE=" + support::cpp11::to_string(pad_z_before));
+ build_opts.add_option("-DSRC_DEPTH=" + support::cpp11::to_string(input_depth));
+ }
+ }
+
+ switch (mode)
+ {
+ case PaddingMode::CONSTANT:
+ {
+ kernel_name += "constant";
+
+ build_opts.add_option("-DCONST_VAL=" + string_from_pixel_value(constant_value, data_type));
+ build_opts.add_option_if(pad_x_before >= vec_size,
+ "-DNUM_THREADS_TO_SKIP_X=" +
+ support::cpp11::to_string(pad_x_before / vec_size));
+
+ if (_4d_enabled)
+ {
+ build_opts.add_option("-DPAD_W_BEFORE=" + support::cpp11::to_string(padding.at(3).first));
+ build_opts.add_option("-DSRC_BATCH=" +
+ support::cpp11::to_string(input->info()->dimension(3)));
+ }
+
+ break;
+ }
+ case PaddingMode::SYMMETRIC:
+ case PaddingMode::REFLECT:
+ {
+ kernel_name += "symmetric_reflect";
+
+ const auto is_reflect = static_cast<unsigned int>(mode == PaddingMode::REFLECT);
+
+ const unsigned int pad_x_before_remainder = pad_x_before % vec_size;
+ const unsigned int pad_x_after_remainder = pad_right_start % vec_size;
+ const unsigned int after_pad_fact_x = (2 * input_width + pad_x_before) - is_reflect;
+ const unsigned int output_last_x =
+ ceil_to_multiple(pad_right_start + padding.at(0).second, vec_size);
+
+ build_opts.add_option("-DIS_REFLECT=" + support::cpp11::to_string(is_reflect));
+ build_opts.add_option("-DPAD_X_BEFORE_REMAINDER=" +
+ support::cpp11::to_string(pad_x_before_remainder));
+ build_opts.add_option("-DPAD_X_AFTER_REMAINDER=" +
+ support::cpp11::to_string(pad_x_after_remainder));
+ build_opts.add_option(
+ "-DPAD_X_BEFORE_REMAINDER_REFL=" +
+ support::cpp11::to_string((pad_x_before_remainder + is_reflect) % vec_size));
+ build_opts.add_option(
+ "-DPAD_X_AFTER_REMAINDER_REFL=" +
+ support::cpp11::to_string((pad_x_after_remainder - is_reflect) % vec_size));
+ build_opts.add_option("-DAFTER_PAD_FACT_X=" + support::cpp11::to_string(after_pad_fact_x));
+ build_opts.add_option_if(after_pad_fact_x < output_last_x,
+ "-DAFTER_PAD_REM=" +
+ support::cpp11::to_string(after_pad_fact_x % vec_size));
+
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("Padding mode not supported.");
+ }
+
+ // Create kernel
+ _kernel = static_cast<cl::Kernel>(
+ CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts.options()));
+}
+
+Status CLPadLayerKernelEx::validate(const ITensorInfo *input, const ITensorInfo *output,
+ const PaddingList &padding, PixelValue constant_value,
+ PaddingMode mode)
+{
+ unsigned int vec_size;
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, padding, constant_value, mode));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(),
+ output->clone().get(), padding,
+ constant_value, mode, vec_size)
+ .first);
+
+ return Status{};
+}
+
+void CLPadLayerKernelEx::run(const Window &window, cl::CommandQueue &queue)
+{
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+
+ Window win_in = window;
+ win_in.adjust(Window::DimX, _input_start_x, true);
+ win_in.adjust(Window::DimY, _input_start_y, true);
+
+ Window slice_out = window.first_slice_window_3D();
+ Window slice_in = win_in.first_slice_window_3D();
+ unsigned int batch = 0;
+ do
+ {
+ unsigned int idx = 0;
+ add_3D_tensor_argument(idx, _input, slice_in);
+ add_3D_tensor_argument(idx, _output, slice_out);
+ if (_4d_enabled)
+ {
+ add_argument<unsigned int>(idx, batch++);
+ }
+
+ enqueue(queue, *this, slice_out, lws_hint());
+ } while (window.slide_window_slice_3D(slice_out) && win_in.slide_window_slice_3D(slice_in));
+}
+} // namespace arm_compute
#include "arm_compute/core/CL/kernels/CLQuantizationSymmetricKernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+
+#include "src/core/CL/CLValidate.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
#include "support/StringSupport.h"
namespace arm_compute
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibraryEx.h"
#include "arm_compute/core/CL/ICLTensor.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+
#include "support/StringSupport.h"
using namespace arm_compute;
#include "arm_compute/core/CL/kernels/CLScaleFactorSymm8Kernel.h"
-#include "arm_compute/core/AccessWindowStatic.h"
+#include "src/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibraryEx.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
#include "support/StringSupport.h"
#include <climits>
#include <algorithm>
#include "arm_compute/core/Types.h"
-#include "arm_compute/core/NEON/NEAsymm.h"
+#include "src/core/NEON/NEAsymm.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/wrapper/wrapper.h"
#include "arm_compute/core/NEON/NEElementwiseOperationFuncs.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/NEON/wrapper/wrapper.h"
#include <algorithm>
#include <arm_neon.h>
std::function<void(const ITensor *, const ITensor *, ITensor *, const Window &)> configure_func(
const ITensor *input1, const ITensor *input2, ITensor *output,
- std::map<std::string, NEElementwiseOperationKernel::ElementwiseFunction *> map_function)
+ std::map<std::string, cpu::kernels::CpuElementwiseKernel::ElementwiseFunction *> map_function)
{
std::string function_to_call("op_");
function_to_call += string_from_data_type(input1->info()->data_type()) + "_";
std::function<void(const ITensor *, const ITensor *, ITensor *, const Window &)>
configure_logic_func(const ITensor *input1, const ITensor *input2, ITensor *output)
{
- static std::map<std::string, NEElementwiseOperationKernel::ElementwiseFunction *> map_function = {
- {"op_U8_U8_U8", &elementwise_logic_op<op, uint8_t, uint8x16_t>},
- {"op_QASYMM8_QASYMM8_QASYMM8", &elementwise_logic_op<op, uint8_t, uint8x16_t>}};
+ static std::map<std::string, cpu::kernels::CpuElementwiseKernel::ElementwiseFunction *>
+ map_function = {{"op_U8_U8_U8", &elementwise_logic_op<op, uint8_t, uint8x16_t>},
+ {"op_QASYMM8_QASYMM8_QASYMM8", &elementwise_logic_op<op, uint8_t, uint8x16_t>}};
return configure_func(input1, input2, output, map_function);
}
const ITensor *input2, ITensor *output)
{
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1->info(), *input2->info(), *output->info()));
- configure_common(input1, input2, output);
+ configure_common(input1->info(), input2->info(), output->info());
switch (op)
{
case BinaryLogicalOperation::AND:
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input1, *input2, *output));
return Status{};
}
-
} // namespace arm_compute
*/
#include "arm_compute/core/NEON/kernels/NECastBoolKernel.h"
-#include "arm_compute/core/CPP/Validate.h"
+#include "src/core/CPP/Validate.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/NEMath.h"
+#include "src/core/NEON/NEMath.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/SaturateCast.h"
+#include "support/SaturateCast.h"
-#include "arm_compute/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
+#include "src/core/NEON/INEKernel.h"
using namespace arm_compute;
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
using namespace arm_compute;
NEEmbeddingLookupKernel::NEEmbeddingLookupKernel()
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+#include "src/core/CPP/Validate.h"
+#include "src/core/NEON/NEFixedPoint.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/WindowHelpers.h"
+
+#include <arm_neon.h>
+#include <cstddef>
+#include <cstdint>
+#include <mutex>
+
+using namespace arm_compute;
+
+namespace
+{
+inline Status validate_arguments(const ITensorInfo *accum, const ITensorInfo *biases)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(accum);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(biases, accum);
+ ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
+ ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != accum->dimension(0));
+
+ return Status{};
+}
+
+inline std::pair<Status, Window> validate_and_configure_window(ITensorInfo *accum,
+ ITensorInfo *biases)
+{
+ constexpr unsigned int num_elems_processed_per_iteration = 16;
+
+ // Configure kernel window
+ Window win = calculate_max_window(*accum, Steps(num_elems_processed_per_iteration));
+
+ bool window_changed = update_window_and_padding(
+ win, AccessWindowHorizontal(accum, 0, num_elems_processed_per_iteration),
+ AccessWindowStatic(biases, 0, 0,
+ ceil_to_multiple(biases->dimension(0), num_elems_processed_per_iteration),
+ biases->tensor_shape().y()));
+
+ AccessWindowHorizontal output_access(accum, 0, num_elems_processed_per_iteration);
+
+ // Set the valid region for the accum tensor
+ Coordinates coord;
+ coord.set_num_dimensions(accum->num_dimensions());
+ output_access.set_valid_region(win, ValidRegion(coord, accum->tensor_shape()));
+
+ Status err = (window_changed)
+ ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
+ : Status{};
+ return std::make_pair(err, win);
+}
+} // namespace
+
+NEGEMMMatrixAccumulateBiasesKernel::NEGEMMMatrixAccumulateBiasesKernel()
+ : _accum(nullptr), _biases(nullptr)
+{
+}
+
+void NEGEMMMatrixAccumulateBiasesKernel::configure(ITensor *accum, const ITensor *biases)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(accum, biases);
+
+ // Perform validate step
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(accum->info(), biases->info()));
+
+ _biases = biases;
+ _accum = accum;
+
+ // Configure kernel window
+ auto win_config = validate_and_configure_window(accum->info(), biases->info());
+ ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+ INEKernel::configure(win_config.second);
+}
+
+Status NEGEMMMatrixAccumulateBiasesKernel::validate(const ITensorInfo *accum,
+ const ITensorInfo *biases)
+{
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(accum, biases));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ validate_and_configure_window(accum->clone().get(), biases->clone().get()).first);
+
+ return Status{};
+}
+
+std::mutex m;
+void NEGEMMMatrixAccumulateBiasesKernel::run(const Window &window, const ThreadInfo &info)
+{
+ std::lock_guard<std::mutex> lock_guard(m);
+ ARM_COMPUTE_UNUSED(info);
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
+
+ Window win_biases;
+ win_biases.set(Window::DimX,
+ Window::Dimension(window.x().start(), window.x().end(), window.x().step()));
+ win_biases.set(Window::DimY, Window::Dimension(0, 1, 1));
+
+ Iterator in0_out(_accum, window);
+ Iterator in1(_biases, win_biases);
+
+ switch (_accum->info()->data_type())
+ {
+ case DataType::F32:
+ {
+ execute_window_loop(
+ window,
+ [&](const Coordinates &) {
+ const float32x4x4_t accum = vld4q_f32(reinterpret_cast<const float *>(in0_out.ptr()));
+ const float32x4x4_t biases = vld4q_f32(reinterpret_cast<const float *>(in1.ptr()));
+ const float32x4x4_t res = {
+ {vaddq_f32(accum.val[0], biases.val[0]), vaddq_f32(accum.val[1], biases.val[1]),
+ vaddq_f32(accum.val[2], biases.val[2]), vaddq_f32(accum.val[3], biases.val[3])}};
+
+ vst4q_f32(reinterpret_cast<float *>(in0_out.ptr()), res);
+ },
+ in0_out, in1);
+ break;
+ }
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+ case DataType::F16:
+ {
+ execute_window_loop(
+ window,
+ [&](const Coordinates &) {
+ const float16x8x2_t accum = vld2q_f16(reinterpret_cast<const float16_t *>(in0_out.ptr()));
+ const float16x8x2_t biases = vld2q_f16(reinterpret_cast<const float16_t *>(in1.ptr()));
+ const float16x8x2_t res = {
+ {vaddq_f16(accum.val[0], biases.val[0]), vaddq_f16(accum.val[1], biases.val[1])}};
+
+ vst2q_f16(reinterpret_cast<float16_t *>(in0_out.ptr()), res);
+ },
+ in0_out, in1);
+ break;
+ }
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+ default:
+ ARM_COMPUTE_ERROR("Data type not supported");
+ break;
+ }
+}
#include "arm_compute/core/NEON/kernels/NEGatherKernelEx.h"
-#include "arm_compute/core/CPP/Validate.h"
+#include "src/core/CPP/Validate.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
namespace arm_compute
{
namespace
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
#include <unordered_map>
using namespace arm_compute;
#include "arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h"
-#include "arm_compute/core/CPP/Validate.h"
+#include "src/core/CPP/Validate.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/NEMath.h"
-#include "arm_compute/core/NEON/wrapper/wrapper.h"
+#include "src/core/NEON/NEMath.h"
+#include "src/core/NEON/INEKernel.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "src/core/NEON/wrapper/wrapper.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
#include <arm_neon.h>
namespace arm_compute
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/NEON/NEAsymm.h"
-#include "arm_compute/core/NEON/wrapper/wrapper.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
-#include "arm_compute/core/CPP/Validate.h"
+#include "src/core/CPP/Validate.h"
+#include "src/core/NEON/INEKernel.h"
+#include "src/core/NEON/NEAsymm.h"
+#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
* SOFTWARE.
*/
#include "arm_compute/core/NEON/kernels/NEOneHotKernel.h"
-#include "arm_compute/core/CPP/Validate.h"
+#include "src/core/CPP/Validate.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
namespace arm_compute
{
namespace
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/NEON/NEAsymm.h"
-#include "arm_compute/core/NEON/wrapper/wrapper.h"
+#include "src/core/NEON/NEAsymm.h"
+#include "src/core/NEON/INEKernel.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
-#include "arm_compute/core/CPP/Validate.h"
+#include "src/core/CPP/Validate.h"
+
+#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/WindowHelpers.h"
#include <arm_neon.h>
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/Utils.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/runtime/Utils.h"
namespace arm_compute
{
"Reduction axis greater than max number of dimensions");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis");
const unsigned int num_of_stages =
- calculate_number_of_stages_only_x_axis(input->dimension(0), axis);
+ utils::calculate_number_of_stages_only_x_axis(input->dimension(0), axis);
DataType output_data_type = DataType::S32;
TensorInfo not_reshaped_output;
ARM_COMPUTE_RETURN_ON_ERROR(CLArgMinMaxLayerKernelEx::validate(
input, &sums_vector[last_stage - 1], ¬_reshaped_output, axis, op));
}
- ARM_COMPUTE_RETURN_ON_ERROR(CLReshapeLayerKernel::validate(¬_reshaped_output, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(CLReshapeLayer::validate(¬_reshaped_output, output));
return Status{};
}
const ReductionOperation &op)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- _num_of_stages = calculate_number_of_stages_only_x_axis(input->info()->dimension(0), axis);
+ _num_of_stages = utils::calculate_number_of_stages_only_x_axis(input->info()->dimension(0), axis);
_reduction_axis = axis;
const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_reduced_shape(
&_not_reshaped_output, axis, op);
_results_vector[last_stage - 1].allocator()->allocate();
}
- _reshape_kernel.configure(&_not_reshaped_output, output);
+ _reshape_kernel.configure(CLKernelLibrary::get().get_compile_context(), &_not_reshaped_output,
+ output);
_not_reshaped_output.allocator()->allocate();
}
{
CLScheduler::get().enqueue(_reduction_kernels_vector[i], false);
}
- CLScheduler::get().enqueue(_reshape_kernel, false);
+ _reshape_kernel.run();
}
} // namespace arm_compute
#include "arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
using namespace arm_compute;
void CLBinaryLogicalOp::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output,
BinaryLogicalOperation op)
{
- auto k = support::cpp14::make_unique<CLBinaryLogicalOpKernel>();
+ auto k = std::make_unique<CLBinaryLogicalOpKernel>();
k->configure(input1, input2, output, op);
_kernel = std::move(k);
ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2;
if (broadcasted_info->info()->dimension(0) == 1)
{
- _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
+ _border_handler->configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
}
}
}
void CLCastBool::configure(ICLTensor *input, ICLTensor *output)
{
- auto k = arm_compute::support::cpp14::make_unique<CLCastBoolKernel>();
+ auto k = std::make_unique<CLCastBoolKernel>();
k->configure(input, output);
_kernel = std::move(k);
}
#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
#include <memory>
#include <tuple>
*/
#include "arm_compute/runtime/CL/functions/CLEmbeddingLookup.h"
-
#include "arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h"
using namespace arm_compute;
void CLEmbeddingLookup::configure(const ICLTensor *input, ICLTensor *output,
const ICLTensor *lookups)
{
- auto k = support::cpp14::make_unique<CLEmbeddingLookupKernel>();
+ auto k = std::make_unique<CLEmbeddingLookupKernel>();
k->configure(input, output, lookups);
_kernel = std::move(k);
}
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "support/MemorySupport.h"
#include <algorithm>
void CLFullyConnectedHybridLayerReshapeWeights::configure(const ICLTensor *input, ICLTensor *output)
{
- auto k = support::cpp14::make_unique<CLTransposeKernel>();
+ auto k = std::make_unique<CLTransposeKernel>();
k->configure(input, output);
_kernel = std::move(k);
}
#include "arm_compute/core/Size2D.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/Cast.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "support/MemorySupport.h"
+
+#include "support/Cast.h"
#include <algorithm>
void CLFullyConnectedLayerReshapeWeightsEx::configure(const ICLTensor *input, ICLTensor *output)
{
- auto k = support::cpp14::make_unique<CLTransposeKernel>();
+ auto k = std::make_unique<CLTransposeKernel>();
k->configure(input, output);
_kernel = std::move(k);
}
#include <arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h>
#include <arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h>
#include <arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h>
+#include "src/core/helpers/AutoConfiguration.h"
using namespace arm_compute;
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Copyright (c) 2017-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/CLKernelLibraryEx.h"
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Utils.h"
+#include "support/StringSupport.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/WindowHelpers.h"
+
+using namespace arm_compute;
+
+namespace
+{
+Status validate_arguments(const ITensorInfo *accum, const ITensorInfo *biases)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(accum);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(biases, accum);
+ ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() != 1);
+
+ return Status{};
+}
+
+std::pair<Status, Window>
+validate_and_configure_window(ITensorInfo *accum, ITensorInfo *biases, GPUTarget gpu_target,
+ unsigned int &num_elems_processed_per_iteration)
+{
+ // Select the vector size to use (8 for Bifrost; 16 for Midgard).
+ bool is_gpu_bifrost =
+ gpu_target_is_in(gpu_target, GPUTarget::G71, GPUTarget::G72, GPUTarget::G76, GPUTarget::G51,
+ GPUTarget::G51BIG, GPUTarget::G51LIT, GPUTarget::G52, GPUTarget::G52LIT);
+ num_elems_processed_per_iteration = is_gpu_bifrost ? 8 : 16;
+
+ // Configure kernel window
+ Window win = calculate_max_window(*accum, Steps(num_elems_processed_per_iteration));
+
+ AccessWindowStatic biases_access(
+ biases, 0, 0, ceil_to_multiple(biases->dimension(0), num_elems_processed_per_iteration),
+ biases->dimension(1));
+ AccessWindowHorizontal accum_access(accum, 0, num_elems_processed_per_iteration);
+
+ bool window_changed = update_window_and_padding(win, biases_access, accum_access);
+
+ Status err = (window_changed)
+ ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
+ : Status{};
+ return std::make_pair(err, win);
+}
+} // namespace
+
+CLGEMMMatrixAccumulateBiasesKernel::CLGEMMMatrixAccumulateBiasesKernel()
+ : _accum(nullptr), _biases(nullptr)
+{
+}
+
+void CLGEMMMatrixAccumulateBiasesKernel::configure(ICLTensor *accum, const ICLTensor *biases)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), accum, biases);
+}
+
+void CLGEMMMatrixAccumulateBiasesKernel::configure(const CLCompileContext &compile_context,
+ ICLTensor *accum, const ICLTensor *biases)
+{
+ ARM_COMPUTE_UNUSED(compile_context);
+ // Perform validate step
+ ARM_COMPUTE_ERROR_ON_NULLPTR(accum, biases);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(accum->info(), biases->info()));
+
+ _biases = biases;
+ _accum = accum;
+
+ // Get the target gpu
+ GPUTarget gpu_target = get_target();
+ unsigned int vector_size = 0;
+
+ // Configure kernel window
+ auto win_config =
+ validate_and_configure_window(accum->info(), biases->info(), gpu_target, vector_size);
+ ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+ ICLKernel::configure_internal(win_config.second);
+
+ // Add build options
+ CLBuildOptions build_opts;
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(accum->info()->data_type()));
+ build_opts.add_option("-DVECTOR_SIZE=" + support::cpp11::to_string(vector_size));
+
+ // Create kernel
+ _kernel = static_cast<cl::Kernel>(
+ CLKernelLibraryEx::get().create_kernel("gemm_accumulate_biases", build_opts.options()));
+}
+
+Status CLGEMMMatrixAccumulateBiasesKernel::validate(const ITensorInfo *accum,
+ const ITensorInfo *biases, GPUTarget gpu_target)
+{
+ unsigned int num_elems_processed_per_iteration = 0;
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(accum, biases));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(accum->clone().get(),
+ biases->clone().get(), gpu_target,
+ num_elems_processed_per_iteration)
+ .first);
+
+ return Status{};
+}
+
+void CLGEMMMatrixAccumulateBiasesKernel::run(const Window &window, cl::CommandQueue &queue)
+{
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window);
+
+ Window accum_slice = window.first_slice_window_2D();
+
+ Window biases_slice(accum_slice);
+ biases_slice.set(Window::DimY, Window::Dimension(0, 1, 1));
+
+ // Run kernel
+ do
+ {
+ // Set arguments
+ unsigned int idx = 0;
+ add_2D_tensor_argument(idx, _accum, accum_slice);
+ add_1D_tensor_argument(idx, _biases, biases_slice);
+
+ enqueue(queue, *this, accum_slice, lws_hint());
+ } while (window.slide_window_slice_2D(accum_slice));
+}
#include "arm_compute/runtime/CL/functions/CLGatherEx.h"
#include "arm_compute/core/CL/ICLTensor.h"
+#include "src/core/CL/kernels/CLGatherKernel.h"
+
#include "arm_compute/core/CL/kernels/CLGatherExKernel.h"
using namespace arm_compute;
void CLGatherEx::configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output,
int axis)
{
- auto k = support::cpp14::make_unique<CLGatherExKernel>();
+ auto k = std::make_unique<CLGatherExKernel>();
k->configure(input, indices, output, axis);
_kernel = std::move(k);
}
void CLHashtableLookup::configure(const ICLTensor *lookups, const ICLTensor *keys,
const ICLTensor *input, ICLTensor *output, ICLTensor *hits)
{
- auto k = support::cpp14::make_unique<CLHashtableLookupKernel>();
+ auto k = std::make_unique<CLHashtableLookupKernel>();
k->configure(lookups, keys, input, output, hits);
_kernel = std::move(k);
}
void CLInstanceNormalizationLayerEx::configure(ICLTensor *input, ICLTensor *output,
ICLTensor *gamma, ICLTensor *beta, float epsilon)
{
- auto k = support::cpp14::make_unique<CLInstanceNormalizationLayerKernelEx>();
+ auto k = std::make_unique<CLInstanceNormalizationLayerKernelEx>();
k->configure(input, output, gamma, beta, epsilon);
_kernel = std::move(k);
}
void CLNeg::configure(ICLTensor *input, ICLTensor *output)
{
- auto k = arm_compute::support::cpp14::make_unique<CLNegKernel>();
+ auto k = std::make_unique<CLNegKernel>();
k->configure(input, output);
_kernel = std::move(k);
}
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/kernels/CLOneHotKernel.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "support/MemorySupport.h"
+
namespace arm_compute
{
CLOneHot::CLOneHot() : _memset_kernel(), _onehot_kernel(), _has_to_memset(false) {}
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLPadLayerEx.h"
+#include "arm_compute/core/CL/kernels/CLPadLayerKernelEx.h"
+
+namespace arm_compute
+{
+CLPadLayerEx::CLPadLayerEx()
+ : _pad_kernel(std::make_unique<CLPadLayerKernelEx>()),
+ _copy_kernel(std::make_unique<opencl::kernels::ClCopyKernel>()), _perform_pad(false)
+{
+}
+
+void CLPadLayerEx::configure(ICLTensor *input, ICLTensor *output, const PaddingList &padding,
+ PixelValue constant_value, PaddingMode mode)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, padding, constant_value,
+ mode);
+}
+
+void CLPadLayerEx::configure(const CLCompileContext &compile_context, ICLTensor *input,
+ ICLTensor *output, const PaddingList &padding,
+ PixelValue constant_value, PaddingMode mode)
+{
+ ARM_COMPUTE_ERROR_THROW_ON(
+ validate(input->info(), output->info(), padding, constant_value, mode));
+
+ _perform_pad = std::any_of(padding.begin(), padding.end(),
+ [](PaddingInfo info) { return info.first > 0 || info.second > 0; });
+
+ if (_perform_pad)
+ {
+ _pad_kernel->configure(compile_context, input, output, padding, constant_value, mode);
+ }
+ else
+ {
+ Window copy_window = Window();
+ copy_window.use_tensor_dimensions(output->info()->tensor_shape());
+ // Copy the input to the whole output if no padding is applied
+ _copy_kernel->configure(compile_context, input->info(), output->info(), ©_window);
+ }
+}
+Status CLPadLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output,
+ const PaddingList &padding, PixelValue constant_value,
+ PaddingMode mode)
+{
+ bool perform_pad = std::any_of(padding.begin(), padding.end(), [](PaddingInfo info) {
+ return info.first > 0 || info.second > 0;
+ });
+
+ if (perform_pad)
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ CLPadLayerKernelEx::validate(input, output, padding, constant_value, mode));
+ }
+ else
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(opencl::kernels::ClCopyKernel::validate(input, output));
+ }
+ return Status{};
+}
+void CLPadLayerEx::run()
+{
+ if (_perform_pad)
+ {
+ CLScheduler::get().enqueue(*_pad_kernel);
+ }
+ else
+ {
+ CLScheduler::get().enqueue(*_copy_kernel);
+ }
+}
+} // namespace arm_compute
ARM_COMPUTE_RETURN_ERROR_ON(num_of_kernels < 1);
// Create temporary tensor infos
- auto interm_tensors = support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors);
+ auto interm_tensors = std::make_unique<TensorInfo[]>(num_of_interm_tensors);
// Create intermediate tensor info
TensorShape shape{input->tensor_shape()};
throw std::runtime_error("CLReduceOperation: there is no axis to reduce");
}
- _interm_tensors = support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors);
- _reduce_kernels = support::cpp14::make_unique<CLReduceOperationKernel[]>(num_of_kernels);
+ _interm_tensors = std::make_unique<CLTensor[]>(num_of_interm_tensors);
+ _reduce_kernels = std::make_unique<CLReduceOperationKernel[]>(num_of_kernels);
// Set a vector that is ordered ICLTensors sequentially.
std::vector<ICLTensor *> tensors;
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/helpers/AutoConfiguration.h"
#include <cassert>
using namespace arm_compute;
{
case DeconvolutionMethod::DIRECT:
{
- auto f = arm_compute::support::cpp14::make_unique<CLDirectTransposeConvLayer>();
+ auto f = std::make_unique<CLDirectTransposeConvLayer>();
f->configure(compile_context, input, weights, bias, output, deconv_info, invalid_right,
invalid_bottom, weights_info);
_function = std::move(f);
}
case DeconvolutionMethod::GEMM:
{
- auto f = arm_compute::support::cpp14::make_unique<CLGEMMDeconvolutionLayer>(_memory_manager);
+ auto f = std::make_unique<CLGEMMDeconvolutionLayer>(_memory_manager);
f->configure(compile_context, input, weights, bias, output, deconv_info);
_function = std::move(f);
break;
* SOFTWARE.
*/
-#include "arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h"
#include <arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h>
+#include "arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h"
#include "arm_compute/core/ITensor.h"
-#include "support/MemorySupport.h"
#include <utility>
void NEBinaryLogicalOperationStatic<COP>::configure(ITensor *input1, ITensor *input2,
ITensor *output)
{
- auto k = support::cpp14::make_unique<NEBinaryLogicalOperationKernel>();
+ auto k = std::make_unique<NEBinaryLogicalOperationKernel>();
k->configure(COP, input1, input2, output);
_kernel = std::move(k);
}
void NEBinaryLogicalOperation::configure(ITensor *input1, ITensor *input2, ITensor *output,
BinaryLogicalOperation op)
{
- auto k = support::cpp14::make_unique<NEBinaryLogicalOperationKernel>();
+ auto k = std::make_unique<NEBinaryLogicalOperationKernel>();
k->configure(op, input1, input2, output);
_kernel = std::move(k);
}
#include "arm_compute/runtime/NEON/functions/NECastBool.h"
#include "arm_compute/core/NEON/kernels/NECastBoolKernel.h"
-#include "support/MemorySupport.h"
using namespace arm_compute;
void NECastBool::configure(const ITensor *input, ITensor *output)
{
- auto k = arm_compute::support::cpp14::make_unique<NECastBoolKernel>();
+ auto k = std::make_unique<NECastBoolKernel>();
k->configure(input, output);
_kernel = std::move(k);
}
#include "arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h"
#include "arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h"
-#include "support/MemorySupport.h"
using namespace arm_compute;
void NEEmbeddingLookup::configure(const ITensor *input, ITensor *output, const ITensor *lookups)
{
- auto k = support::cpp14::make_unique<NEEmbeddingLookupKernel>();
+ auto k = std::make_unique<NEEmbeddingLookupKernel>();
k->configure(input, output, lookups);
_kernel = std::move(k);
}
void NEFullyConnectedHybridLayerReshapeWeights::configure(const ITensor *input, ITensor *output)
{
- auto k = support::cpp14::make_unique<NETransposeKernel>();
+ auto k = std::make_unique<NETransposeKernel>();
k->configure(input, output);
_kernel = std::move(k);
}
#include <algorithm>
#include <cmath>
-using namespace arm_compute;
+namespace arm_compute
+{
using namespace arm_compute::misc::shape_calculator;
namespace
const ITensor *biases, ITensor *output,
FullyConnectedLayerInfo fc_info)
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
-
// Perform validate step
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_ERROR_THROW_ON(NEFullyConnectedLayerEx::validate(
input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
fc_info));
(input->dimension(0) * input->dimension(1) * input->dimension(2))));
// Validate flatten kernel
- ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayerKernel::validate(input, &flatten_input));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayer::validate(input, &flatten_input));
input_to_use = &flatten_input;
}
else
if (!_is_prepared)
{
if (!_are_weights_reshaped)
+ {
_reshape_weights_output.allocator()->allocate();
+ }
if (!_are_weights_converted)
+ {
_converted_weights_output.allocator()->allocate();
+ }
_is_prepared = true;
}
// Linearize input if it comes from a convolutional layer
if (_is_fc_after_conv)
{
- NEScheduler::get().schedule(&_flatten_kernel, Window::DimY);
+ _flatten_kernel.run();
}
// Run matrix multiply
}
#endif
}
+} // namespace arm_compute
#include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h>
#include <arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h>
#include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h>
+#include "src/core/helpers/AutoConfiguration.h"
+#include <cassert>
using namespace arm_compute;
#include "arm_compute/runtime/NEON/functions/NEGatherEx.h"
#include "arm_compute/core/NEON/kernels/NEGatherKernelEx.h"
-#include "support/MemorySupport.h"
#include <utility>
{
void NEGatherEx::configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis)
{
- auto k = support::cpp14::make_unique<NEGatherKernelEx>();
+ auto k = std::make_unique<NEGatherKernelEx>();
k->configure(input, indices, output, axis);
_kernel = std::move(k);
}
#include "arm_compute/runtime/NEON/functions/NEHashtableLookup.h"
#include "arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h"
-#include "support/MemorySupport.h"
using namespace arm_compute;
void NEHashtableLookup::configure(const ITensor *lookups, const ITensor *keys, const ITensor *input,
ITensor *output, ITensor *hits)
{
- auto k = support::cpp14::make_unique<NEHashtableLookupKernel>();
+ auto k = std::make_unique<NEHashtableLookupKernel>();
k->configure(lookups, keys, input, output, hits);
_kernel = std::move(k);
}
*/
#include "arm_compute/runtime/NEON/functions/NEOneHot.h"
#include "arm_compute/core/NEON/kernels/NEOneHotKernel.h"
-#include "support/MemorySupport.h"
+
#include <utility>
namespace arm_compute
{
void NEOneHot::configure(const ITensor *indices, const ITensor *depth, const ITensor *on_value,
const ITensor *off_value, ITensor *output, int axis)
{
- auto k = arm_compute::support::cpp14::make_unique<NEOneHotKernel>();
+ auto k = std::make_unique<NEOneHotKernel>();
k->configure(indices, depth, on_value, off_value, output, axis);
_kernel = std::move(k);
}
#include "arm_compute/runtime/NEON/functions/NEReduceOperation.h"
-#include "arm_compute/core/CPP/Validate.h"
+#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/runtime/Tensor.h"
+#include "src/core/CPP/Validate.h"
+#include "src/core/NEON/kernels/NEReductionOperationKernel.h"
+#include "src/core/helpers/AutoConfiguration.h"
using namespace arm_compute;
#include "arm_compute/runtime/NEON/functions/NEReduceSum.h"
-#include "arm_compute/core/CPP/Validate.h"
+#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/CPP/Validate.h"
+#include "src/core/NEON/kernels/NEReductionOperationKernel.h"
+#include "src/core/helpers/AutoConfiguration.h"
using namespace arm_compute;
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/helpers/AutoConfiguration.h"
using namespace arm_compute::misc::shape_calculator;
int32_t zero_point;
float scale;
float *table;
+ uint8_t *uint8_table1;
+ uint8_t *uint8_table2;
};
struct PackParams
#include "Shape.h"
+#include "neon/neon_check.h"
+
#include <algorithm>
#include <cstdint>
#include <fixedpoint/fixedpoint.h>
namespace cker
{
+template <typename T> struct is_quant8
+{
+ static constexpr bool value = std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value;
+};
+
template <typename T>
inline T ActivationFunctionWithMinMax(T x, T output_activation_min, T output_activation_max)
{
gemmlowp::SaturatingRoundingDoublingHighMul(x, quantized_multiplier), -left_shift);
}
+#ifdef USE_NEON
+inline int32x4x4_t MultiplyByQuantizedMultiplier4Rows(int32x4x4_t input_val,
+ int32_t quantized_multiplier, int32_t shift)
+{
+ const int left_shift = std::max(shift, 0);
+ const int right_shift = std::min(shift, 0);
+ int32x4x4_t result;
+
+ int32x4_t multiplier_dup = vdupq_n_s32(quantized_multiplier);
+ int32x4_t left_shift_dup = vdupq_n_s32(left_shift);
+ int32x4_t right_shift_dup = vdupq_n_s32(right_shift);
+
+ result.val[0] = vrshlq_s32(
+ vqrdmulhq_s32(vshlq_s32(input_val.val[0], left_shift_dup), multiplier_dup), right_shift_dup);
+
+ result.val[1] = vrshlq_s32(
+ vqrdmulhq_s32(vshlq_s32(input_val.val[1], left_shift_dup), multiplier_dup), right_shift_dup);
+
+ result.val[2] = vrshlq_s32(
+ vqrdmulhq_s32(vshlq_s32(input_val.val[2], left_shift_dup), multiplier_dup), right_shift_dup);
+
+ result.val[3] = vrshlq_s32(
+ vqrdmulhq_s32(vshlq_s32(input_val.val[3], left_shift_dup), multiplier_dup), right_shift_dup);
+
+ return result;
+}
+#endif
+
inline int NodeOffset(int b, int h, int w, int height, int width)
{
return (b * height + h) * width + w;
}
}
+template <>
+void AveragePool<int8_t>(const PoolParams ¶ms, const Shape &input_shape,
+ const int8_t *input_data, const Shape &output_shape, int8_t *output_data)
+{
+ // Here, and in other pooling ops, in order to maintain locality of reference,
+ // to minimize some recalculations, and to load into NEON vector registers, we
+ // use an inner loop down the depth. Since depths can be large and hence we
+ // would need arbitrarily large temporary storage, we divide the work up into
+ // depth tranches just within the batch loop.
+ static constexpr int kPoolingAccTrancheSize = 256;
+
+ assert(params.quantized_activation_min <= params.quantized_activation_max);
+ assert(input_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+ const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+ const int depth = MatchingDim(input_shape, 3, output_shape, 3);
+ const int input_height = input_shape.Dims(1);
+ const int input_width = input_shape.Dims(2);
+ const int output_height = output_shape.Dims(1);
+ const int output_width = output_shape.Dims(2);
+ const int stride_height = params.stride_height;
+ const int stride_width = params.stride_width;
+
+ int32_t acc[kPoolingAccTrancheSize];
+ for (int batch = 0; batch < batches; ++batch)
+ {
+ // We proceed through the depth in tranches (see comment above). The
+ // depth_base is the depth at the beginning of the tranche. The
+ // tranche_depth is the depth dimension of the tranche.
+ for (int depth_base = 0; depth_base < depth; depth_base += kPoolingAccTrancheSize)
+ {
+ const int tranche_depth = std::min(depth - depth_base, kPoolingAccTrancheSize);
+ for (int out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (int out_x = 0; out_x < output_width; ++out_x)
+ {
+ const int in_x_origin = (out_x * stride_width) - params.padding_values.width;
+ const int in_y_origin = (out_y * stride_height) - params.padding_values.height;
+ const int filter_x_start = std::max(0, -in_x_origin);
+ const int filter_x_end = std::min(params.filter_width, input_width - in_x_origin);
+ const int filter_y_start = std::max(0, -in_y_origin);
+ const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin);
+ const int filter_count =
+ (filter_x_end - filter_x_start) * (filter_y_end - filter_y_start);
+ memset(acc, 0, tranche_depth * sizeof(acc[0]));
+ const int8_t *input_ptr =
+ input_data + depth_base +
+ depth * (in_x_origin + input_width * (in_y_origin + input_height * batch));
+ for (int fy = filter_y_start; fy < filter_y_end; fy++)
+ {
+ const int8_t *input_row_ptr = input_ptr + depth * (fy * input_width + filter_x_start);
+ for (int fx = filter_x_start; fx < filter_x_end; fx++)
+ {
+ const int8_t *input_channel_ptr = input_row_ptr;
+ int channel = 0;
+#ifdef USE_NEON
+ for (; channel <= tranche_depth - 16; channel += 16)
+ {
+ int16x4_t acc_reg[4];
+ int8x16_t input_reg = vld1q_s8(input_channel_ptr);
+ input_channel_ptr += 16;
+ acc_reg[0] = vget_low_s16(vmovl_s8(vget_low_s8(input_reg)));
+ acc_reg[1] = vget_high_s16(vmovl_s8(vget_low_s8(input_reg)));
+ acc_reg[2] = vget_low_s16(vmovl_s8(vget_high_s8(input_reg)));
+ acc_reg[3] = vget_high_s16(vmovl_s8(vget_high_s8(input_reg)));
+ for (int i = 0; i < 4; i++)
+ {
+ vst1q_s32(acc + channel + 4 * i,
+ vaddw_s16(vld1q_s32(acc + channel + 4 * i), acc_reg[i]));
+ }
+ }
+ for (; channel <= tranche_depth - 8; channel += 8)
+ {
+ int16x4_t acc_reg[2];
+ int16x8_t input_reg = vmovl_s8(vld1_s8(input_channel_ptr));
+ input_channel_ptr += 8;
+ acc_reg[0] = vget_low_s16(input_reg);
+ acc_reg[1] = vget_high_s16(input_reg);
+ for (int i = 0; i < 2; i++)
+ {
+ vst1q_s32(acc + channel + 4 * i,
+ vaddw_s16(vld1q_s32(acc + channel + 4 * i), acc_reg[i]));
+ }
+ }
+#endif
+ for (; channel < tranche_depth; ++channel)
+ {
+ acc[channel] += *input_channel_ptr++;
+ }
+ input_row_ptr += depth;
+ }
+ }
+ int8_t *output_ptr = output_data + Offset(output_shape, batch, out_y, out_x, depth_base);
+ int channel = 0;
+#ifdef USE_NEON
+ for (; channel <= tranche_depth - 8; channel += 8)
+ {
+ int16_t buf[8];
+ for (int i = 0; i < 8; i++)
+ {
+ buf[i] = acc[channel + i] > 0 ? (acc[channel + i] + filter_count / 2) / filter_count
+ : (acc[channel + i] - filter_count / 2) / filter_count;
+ }
+ int8x8_t buf8 = vqmovn_s16(vld1q_s16(buf));
+ buf8 = vmin_s8(buf8, vdup_n_s8(params.quantized_activation_max));
+ buf8 = vmax_s8(buf8, vdup_n_s8(params.quantized_activation_min));
+ vst1_s8(output_ptr + channel, buf8);
+ }
+#endif
+ for (; channel < tranche_depth; ++channel)
+ {
+ int16_t a = acc[channel] > 0 ? (acc[channel] + filter_count / 2) / filter_count
+ : (acc[channel] - filter_count / 2) / filter_count;
+ a = std::max<int16_t>(a, params.quantized_activation_min);
+ a = std::min<int16_t>(a, params.quantized_activation_max);
+ output_ptr[channel] = static_cast<int8_t>(a);
+ }
+ }
+ }
+ }
+ }
+}
+
} // namespace cker
} // namespace nnfw
}
template <BinaryArithmeticOpType op_type, typename T>
-inline void BinaryArithmeticOp(const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape,
- const T *input1_data, const Shape &input2_shape,
- const T *input2_data, const Shape &output_shape, T *output_data)
+inline typename std::enable_if_t<!is_quant8<T>::value>
+BinaryArithmeticOp(const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape,
+ const T *input1_data, const Shape &input2_shape, const T *input2_data,
+ const Shape &output_shape, T *output_data)
{
reference::BinaryArithmeticOp(params, input1_shape, input1_data, input2_shape, input2_data,
output_shape, output_data, GetBinaryArtithmeticFn<op_type, T>());
}
-template <BinaryArithmeticOpType op_type>
-inline void BinaryArithmeticOp(const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape,
- const uint8_t *input1_data, const Shape &input2_shape,
- const uint8_t *input2_data, const Shape &output_shape,
- uint8_t *output_data)
+template <BinaryArithmeticOpType op_type, typename T>
+inline typename std::enable_if_t<is_quant8<T>::value>
+BinaryArithmeticOp(const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape,
+ const T *input1_data, const Shape &input2_shape, const T *input2_data,
+ const Shape &output_shape, T *output_data)
{
switch (op_type)
{
case nnfw::cker::BinaryArithmeticOpType::ADD:
case nnfw::cker::BinaryArithmeticOpType::SUB:
- optimized::AddQuant8(params, input1_shape, input1_data, input2_shape, input2_data,
- output_shape, output_data);
+ optimized::Add(params, input1_shape, input1_data, input2_shape, input2_data, output_shape,
+ output_data);
break;
case nnfw::cker::BinaryArithmeticOpType::MUL:
- optimized::MulQuant8(params, input1_shape, const_cast<uint8_t *>(input1_data), input2_shape,
- const_cast<uint8_t *>(input2_data), output_shape, output_data);
+ optimized::Mul(params, input1_shape, input1_data, input2_shape, input2_data, output_shape,
+ output_data);
break;
case nnfw::cker::BinaryArithmeticOpType::DIV:
throw std::runtime_error{"Quant8 Asymm NYI"};
-
default:
assert(false);
break;
}
template <BinaryArithmeticOpType op_type, typename T>
-inline void BroadcastBinaryArithmeticOp(BinaryArithmeticOpParam ¶ms, const Shape &input1_shape,
- const T *input1_data, const Shape &input2_shape,
- const T *input2_data, const Shape &output_shape,
- T *output_data)
+inline typename std::enable_if_t<!is_quant8<T>::value>
+BroadcastBinaryArithmeticOp(BinaryArithmeticOpParam ¶ms, const Shape &input1_shape,
+ const T *input1_data, const Shape &input2_shape, const T *input2_data,
+ const Shape &output_shape, T *output_data)
{
reference::BroadcastBinaryArithmeticOpSlow(params, input1_shape, input1_data, input2_shape,
input2_data, output_shape, output_data,
GetBinaryArtithmeticFn<op_type, T>());
}
-template <BinaryArithmeticOpType op_type>
-inline void BroadcastBinaryArithmeticOp(BinaryArithmeticOpParam ¶ms, const Shape &input1_shape,
- const uint8_t *input1_data, const Shape &input2_shape,
- const uint8_t *input2_data, const Shape &output_shape,
- uint8_t *output_data)
+template <BinaryArithmeticOpType op_type, typename T>
+inline typename std::enable_if_t<is_quant8<T>::value>
+BroadcastBinaryArithmeticOp(BinaryArithmeticOpParam ¶ms, const Shape &input1_shape,
+ const T *input1_data, const Shape &input2_shape, const T *input2_data,
+ const Shape &output_shape, T *output_data)
{
switch (op_type)
{
case nnfw::cker::BinaryArithmeticOpType::ADD:
case nnfw::cker::BinaryArithmeticOpType::SUB:
- optimized::BroadcastAddDispatchQuant8(params, input1_shape, input1_data, input2_shape,
- input2_data, output_shape, output_data);
+ optimized::BroadcastAddDispatch(params, input1_shape, input1_data, input2_shape, input2_data,
+ output_shape, output_data);
break;
case nnfw::cker::BinaryArithmeticOpType::MUL:
- optimized::BroadcastMulDispatchQuant8(
- params, input1_shape, const_cast<uint8_t *>(input1_data), input2_shape,
- const_cast<uint8_t *>(input2_data), output_shape, output_data);
+ optimized::BroadcastMulDispatch(params, input1_shape, input1_data, input2_shape, input2_data,
+ output_shape, output_data);
break;
case nnfw::cker::BinaryArithmeticOpType::DIV:
case nnfw::cker::BinaryArithmeticOpType::POW:
}
}
};
-} // functor
+} // namespace functor
template <typename T>
inline void BroadcastTo(const Shape &input_shape, T *input_data, const Shape &output_shape,
}
}
+ void operator()(const ConvParams ¶ms, const Shape &input_shape, const int8_t *input_data,
+ const Shape &filter_shape, const int8_t *filter_data, const Shape &bias_shape,
+ const int32_t *bias_data, const Shape &output_shape, int8_t *output_data)
+ {
+ reference::Conv(params, _per_channel_output_multiplier.data(), _per_channel_output_shift.data(),
+ input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data,
+ output_shape, output_data);
+ }
+ std::vector<int32_t> &per_channel_output_multiplier() { return _per_channel_output_multiplier; }
+ std::vector<int> &per_channel_output_shift() { return _per_channel_output_shift; }
+
private:
bool usableMultiThreaded(PaddingType padding_type, uint32_t dilation_width_factor,
int32_t dilation_height_factor)
Shape _im2col_shape;
bool _need_im2col;
bool _prepared;
+ // Per channel output multiplier and shift.
+ std::vector<int32_t> _per_channel_output_multiplier;
+ std::vector<int> _per_channel_output_shift;
};
} // namespace cker
} // namespace nnfw
#include "cker/neon/neon_check.h"
#include "cker/operation/optimized/DepthwiseConvFloat.h"
#include "cker/operation/optimized/DepthwiseConvUint8.h"
+#include "cker/operation/optimized/integer_ops/DepthwiseConvInt8.h"
#include "cker/CpuBackendThreadpool.h"
namespace nnfw
{
return Shape::ExtendedShape(shape.DimensionsCount(), shape);
}
-}
+} // namespace
class Einsum
{
{
namespace cker
{
-template <typename T> inline void Fill(const T value_data, const Shape &output_shape, T output_data)
+template <typename T>
+inline void Fill(const T *value_data, const Shape &output_shape, T *output_data)
{
int output_size = output_shape.FlatSize();
for (int i = 0; i < output_size; i++)
}
} // namespace random
-} // namespace tensorflow
-}
+} // namespace cker
+} // namespace nnfw
#endif // __NNFW_CKER_HELPER_RANDOM_DISTRIBUTIONS_H__
};
} // namespace functor
-} // namespace tensorflow
-}
+} // namespace cker
+} // namespace nnfw
#endif // __NNFW_CKER_HELPER_RANDOM_OP_H__
} // namespace functor
-} // end namespace tensorflow
-}
+} // namespace cker
+} // namespace nnfw
#endif // __NNFW_CKER_HELPER_RANDOM_OP_CPU_H__
/*
* Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.*
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#include "cker/Shape.h"
#include "cker/Types.h"
#include "cker/Utils.h"
-#include <stdexcept>
+#include <cassert>
#include <iostream>
+#include <stdexcept>
+
namespace nnfw
{
namespace cker
output_data[i] = clamped;
}
}
+
+inline void Quantize(const int32_t *multiplier, const int32_t *shift, int32_t channel_size,
+ int32_t total_size, int32_t output_zp, int32_t output_min, int32_t output_max,
+ int32_t *scratch, int8_t *output)
+{
+ // Here we're trying to quantize the raw accumulators:
+ // output_channels
+ // data data data data data
+ // rows data data data data data
+ // data data data data data
+ // ....
+ //
+ // In order to minimize the reload of the multipliers & shifts, once we load
+ // the multipliers & shifts, we load & quantize the raw accumulators for every
+ // row.
+#ifdef USE_NEON
+ const int32x4_t output_offset_vec = vdupq_n_s32(output_zp);
+ const int32x4_t output_activation_min_vec = vdupq_n_s32(output_min);
+ const int32x4_t output_activation_max_vec = vdupq_n_s32(output_max);
+ const int32x4_t zeros = vdupq_n_s32(0);
+#endif
+
+ assert(total_size % channel_size == 0);
+ const int32_t rows = total_size / channel_size;
+
+ int c = 0;
+
+#ifdef USE_NEON
+ using gemmlowp::RoundingDivideByPOT;
+ for (; c <= channel_size - 8; c += 8)
+ {
+ int32x4_t out_shift_1 = vld1q_s32(shift + c);
+ int32x4_t out_shift_2 = vld1q_s32(shift + c + 4);
+ int32x4_t left_shift_1 = vmaxq_s32(out_shift_1, zeros);
+ int32x4_t left_shift_2 = vmaxq_s32(out_shift_2, zeros);
+
+ // Right shift will be performed as left shift with negative values.
+ int32x4_t right_shift_1 = vminq_s32(out_shift_1, zeros);
+ int32x4_t right_shift_2 = vminq_s32(out_shift_2, zeros);
+
+ int32x4_t out_mul_1 = vld1q_s32(multiplier + c);
+ int32x4_t out_mul_2 = vld1q_s32(multiplier + c + 4);
+ for (int n = 0; n < rows; ++n)
+ {
+ int loc = n * channel_size + c;
+ int32x4_t acc_1 = vld1q_s32(scratch + loc);
+ int32x4_t acc_2 = vld1q_s32(scratch + loc + 4);
+
+ // Saturating Rounding Doubling High Mul.
+ acc_1 = vshlq_s32(acc_1, left_shift_1);
+ acc_1 = vqrdmulhq_s32(acc_1, out_mul_1);
+ acc_2 = vshlq_s32(acc_2, left_shift_2);
+ acc_2 = vqrdmulhq_s32(acc_2, out_mul_2);
+
+ // Rounding Dividing By POT.
+ acc_1 = vrshlq_s32(acc_1, right_shift_1);
+ acc_2 = vrshlq_s32(acc_2, right_shift_2);
+
+ // Add the output offset.
+ acc_1 = vaddq_s32(acc_1, output_offset_vec);
+ acc_2 = vaddq_s32(acc_2, output_offset_vec);
+
+ // Apply the activation function.
+ acc_1 = vmaxq_s32(acc_1, output_activation_min_vec);
+ acc_1 = vminq_s32(acc_1, output_activation_max_vec);
+ acc_2 = vmaxq_s32(acc_2, output_activation_min_vec);
+ acc_2 = vminq_s32(acc_2, output_activation_max_vec);
+
+ // Saturating cast to int8 and store to destination.
+ const int16x4_t acc_s16_1 = vqmovn_s32(acc_1);
+ const int16x4_t acc_s16_2 = vqmovn_s32(acc_2);
+ const int16x8_t res_s16 = vcombine_s16(acc_s16_1, acc_s16_2);
+ const int8x8_t res_s8 = vqmovn_s16(res_s16);
+ vst1_s8(output + loc, res_s8);
+ }
+ }
+
+#endif // USE_NEON
+ // Handle leftover values, one by one. This is very slow.
+ for (; c < channel_size; c++)
+ {
+ for (int n = 0; n < rows; ++n)
+ {
+ int loc = n * channel_size + c;
+ int32_t acc = scratch[loc];
+ acc = MultiplyByQuantizedMultiplier(acc, multiplier[c], shift[c]);
+ acc += output_zp;
+ acc = std::max(acc, output_min);
+ acc = std::min(acc, output_max);
+ output[loc] = static_cast<int8_t>(acc);
+ }
+ }
+}
+
+template <typename input_type, typename output_type>
+inline void Requantize(const input_type *input_data, int32_t size,
+ int32_t effective_scale_multiplier, int32_t effective_scale_shift,
+ int32_t input_zeropoint, int32_t output_zeropoint, output_type *output_data)
+{
+ assert(!"Requantize: not supported type. It shouldn't reach here.");
+ UNUSED_ALL(input_data, size, effective_scale_multiplier, effective_scale_shift, input_zeropoint,
+ output_zeropoint, output_data);
+}
+
+template <>
+inline void Requantize<uint8_t, int8_t>(const uint8_t *input_data, int32_t size,
+ int32_t effective_scale_multiplier,
+ int32_t effective_scale_shift, int32_t input_zeropoint,
+ int32_t output_zeropoint, int8_t *output_data)
+{
+ static constexpr int32_t kMinOutput = std::numeric_limits<int8_t>::min();
+ static constexpr int32_t kMaxOutput = std::numeric_limits<int8_t>::max();
+
+ int i = 0;
+#ifdef USE_NEON
+ // Constants.
+ const int32x4_t input_zero_point_dup = vdupq_n_s32(-input_zeropoint);
+ const int32x4_t output_zero_point_dup = vdupq_n_s32(output_zeropoint);
+ const int32x4_t min_val_dup = vdupq_n_s32(kMinOutput);
+ const int32x4_t max_val_dup = vdupq_n_s32(kMaxOutput);
+
+ for (; i <= size - 16; i += 16)
+ {
+ const uint8x16_t input_vec = vld1q_u8(input_data + i);
+ const uint16x8_t first_half = vmovl_u8(vget_low_u8(input_vec));
+ const uint16x8_t second_half = vmovl_u8(vget_high_u8(input_vec));
+ int32x4x4_t input;
+ input.val[0] = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(first_half)));
+ input.val[1] = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(first_half)));
+ input.val[2] = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(second_half)));
+ input.val[3] = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(second_half)));
+ input.val[0] = vaddq_s32(input.val[0], input_zero_point_dup);
+ input.val[1] = vaddq_s32(input.val[1], input_zero_point_dup);
+ input.val[2] = vaddq_s32(input.val[2], input_zero_point_dup);
+ input.val[3] = vaddq_s32(input.val[3], input_zero_point_dup);
+
+ int32x4x4_t result =
+ MultiplyByQuantizedMultiplier4Rows(input, effective_scale_multiplier, effective_scale_shift);
+
+ result.val[0] = vaddq_s32(result.val[0], output_zero_point_dup);
+ result.val[1] = vaddq_s32(result.val[1], output_zero_point_dup);
+ result.val[2] = vaddq_s32(result.val[2], output_zero_point_dup);
+ result.val[3] = vaddq_s32(result.val[3], output_zero_point_dup);
+ result.val[0] = vmaxq_s32(vminq_s32(result.val[0], max_val_dup), min_val_dup);
+ result.val[1] = vmaxq_s32(vminq_s32(result.val[1], max_val_dup), min_val_dup);
+ result.val[2] = vmaxq_s32(vminq_s32(result.val[2], max_val_dup), min_val_dup);
+ result.val[3] = vmaxq_s32(vminq_s32(result.val[3], max_val_dup), min_val_dup);
+
+ const int16x4_t narrowed_val_1 = vqmovn_s32(result.val[0]);
+ const int16x4_t narrowed_val_2 = vqmovn_s32(result.val[1]);
+ const int16x4_t narrowed_val_3 = vqmovn_s32(result.val[2]);
+ const int16x4_t narrowed_val_4 = vqmovn_s32(result.val[3]);
+ const int16x8_t output_first_half = vcombine_s16(narrowed_val_1, narrowed_val_2);
+ const int16x8_t output_second_half = vcombine_s16(narrowed_val_3, narrowed_val_4);
+ const int8x8_t narrowed_first_half = vqmovn_s16(output_first_half);
+ const int8x8_t narrowed_second_half = vqmovn_s16(output_second_half);
+ const int8x16_t narrowed_result = vcombine_s8(narrowed_first_half, narrowed_second_half);
+ vst1q_s8(output_data + i, narrowed_result);
+ }
+
+#endif
+ for (; i < size; ++i)
+ {
+ const int32_t input = input_data[i] - input_zeropoint;
+ const int32_t output =
+ MultiplyByQuantizedMultiplier(input, effective_scale_multiplier, effective_scale_shift) +
+ output_zeropoint;
+ const int32_t clamped_output = std::max(std::min(output, kMaxOutput), kMinOutput);
+ output_data[i] = static_cast<int8_t>(clamped_output);
+ }
+}
+
+template <>
+inline void Requantize<int8_t, uint8_t>(const int8_t *input_data, int32_t size,
+ int32_t effective_scale_multiplier,
+ int32_t effective_scale_shift, int32_t input_zeropoint,
+ int32_t output_zeropoint, uint8_t *output_data)
+{
+ static constexpr int32_t kMinOutput = std::numeric_limits<uint8_t>::min();
+ static constexpr int32_t kMaxOutput = std::numeric_limits<uint8_t>::max();
+
+ int i = 0;
+#ifdef USE_NEON
+ // Constants.
+ const int32x4_t input_zero_point_dup = vdupq_n_s32(-input_zeropoint);
+ const int32x4_t output_zero_point_dup = vdupq_n_s32(output_zeropoint);
+ const int32x4_t min_val_dup = vdupq_n_s32(kMinOutput);
+ const int32x4_t max_val_dup = vdupq_n_s32(kMaxOutput);
+
+ for (; i <= size - 16; i += 16)
+ {
+ const int8x16_t input_vec = vld1q_s8(input_data + i);
+ const int16x8_t first_half = vmovl_s8(vget_low_s8(input_vec));
+ const int16x8_t second_half = vmovl_s8(vget_high_s8(input_vec));
+ int32x4x4_t input;
+ input.val[0] = vmovl_s16(vget_low_s16(first_half));
+ input.val[1] = vmovl_s16(vget_high_s16(first_half));
+ input.val[2] = vmovl_s16(vget_low_s16(second_half));
+ input.val[3] = vmovl_s16(vget_high_s16(second_half));
+ input.val[0] = vaddq_s32(input.val[0], input_zero_point_dup);
+ input.val[1] = vaddq_s32(input.val[1], input_zero_point_dup);
+ input.val[2] = vaddq_s32(input.val[2], input_zero_point_dup);
+ input.val[3] = vaddq_s32(input.val[3], input_zero_point_dup);
+
+ int32x4x4_t result =
+ MultiplyByQuantizedMultiplier4Rows(input, effective_scale_multiplier, effective_scale_shift);
+
+ result.val[0] = vaddq_s32(result.val[0], output_zero_point_dup);
+ result.val[1] = vaddq_s32(result.val[1], output_zero_point_dup);
+ result.val[2] = vaddq_s32(result.val[2], output_zero_point_dup);
+ result.val[3] = vaddq_s32(result.val[3], output_zero_point_dup);
+ result.val[0] = vmaxq_s32(vminq_s32(result.val[0], max_val_dup), min_val_dup);
+ result.val[1] = vmaxq_s32(vminq_s32(result.val[1], max_val_dup), min_val_dup);
+ result.val[2] = vmaxq_s32(vminq_s32(result.val[2], max_val_dup), min_val_dup);
+ result.val[3] = vmaxq_s32(vminq_s32(result.val[3], max_val_dup), min_val_dup);
+
+ const uint32x4_t result_val_1_unsigned = vreinterpretq_u32_s32(result.val[0]);
+ const uint32x4_t result_val_2_unsigned = vreinterpretq_u32_s32(result.val[1]);
+ const uint32x4_t result_val_3_unsigned = vreinterpretq_u32_s32(result.val[2]);
+ const uint32x4_t result_val_4_unsigned = vreinterpretq_u32_s32(result.val[3]);
+
+ const uint16x4_t narrowed_val_1 = vqmovn_u32(result_val_1_unsigned);
+ const uint16x4_t narrowed_val_2 = vqmovn_u32(result_val_2_unsigned);
+ const uint16x4_t narrowed_val_3 = vqmovn_u32(result_val_3_unsigned);
+ const uint16x4_t narrowed_val_4 = vqmovn_u32(result_val_4_unsigned);
+ const uint16x8_t output_first_half = vcombine_u16(narrowed_val_1, narrowed_val_2);
+ const uint16x8_t output_second_half = vcombine_u16(narrowed_val_3, narrowed_val_4);
+ const uint8x8_t narrowed_first_half = vqmovn_u16(output_first_half);
+ const uint8x8_t narrowed_second_half = vqmovn_u16(output_second_half);
+ const uint8x16_t narrowed_result = vcombine_u8(narrowed_first_half, narrowed_second_half);
+ vst1q_u8(output_data + i, narrowed_result);
+ }
+
+#endif
+ for (; i < size; ++i)
+ {
+ const int32_t input = input_data[i] - input_zeropoint;
+ const int32_t output =
+ MultiplyByQuantizedMultiplier(input, effective_scale_multiplier, effective_scale_shift) +
+ output_zeropoint;
+ const int32_t clamped_output = std::max(std::min(output, kMaxOutput), kMinOutput);
+ output_data[i] = static_cast<uint8_t>(clamped_output);
+ }
+}
+
} // namespace cker
} // namespace nnfw
input_size *= input_dims[idx];
}
reduce_size = input_dims[input_num_dims - 1];
+ int offset = 0;
for (int idx = 0; idx < input_size; idx++)
{
int r_idx = 0;
float32x4_t tmp_data_32x4 = vld1q_f32(tmp_data);
for (; r_idx <= reduce_size - 32; r_idx += 32)
{
- float32x4_t a10 = vld1q_f32(input_data + r_idx);
- float32x4_t a11 = vld1q_f32(input_data + r_idx + 4);
- float32x4_t a12 = vld1q_f32(input_data + r_idx + 8);
- float32x4_t a13 = vld1q_f32(input_data + r_idx + 12);
- float32x4_t a20 = vld1q_f32(input_data + r_idx + 16);
- float32x4_t a21 = vld1q_f32(input_data + r_idx + 20);
- float32x4_t a22 = vld1q_f32(input_data + r_idx + 24);
- float32x4_t a23 = vld1q_f32(input_data + r_idx + 28);
+ float32x4_t a10 = vld1q_f32(input_data + offset + r_idx);
+ float32x4_t a11 = vld1q_f32(input_data + offset + r_idx + 4);
+ float32x4_t a12 = vld1q_f32(input_data + offset + r_idx + 8);
+ float32x4_t a13 = vld1q_f32(input_data + offset + r_idx + 12);
+ float32x4_t a20 = vld1q_f32(input_data + offset + r_idx + 16);
+ float32x4_t a21 = vld1q_f32(input_data + offset + r_idx + 20);
+ float32x4_t a22 = vld1q_f32(input_data + offset + r_idx + 24);
+ float32x4_t a23 = vld1q_f32(input_data + offset + r_idx + 28);
float32x4_t x0 = vaddq_f32(a10, a20);
float32x4_t x1 = vaddq_f32(a11, a21);
float32x4_t y2 = vaddq_f32(y0, y1);
tmp_data_32x4 = vaddq_f32(tmp_data_32x4, y2);
}
+ for (; r_idx <= reduce_size - 16; r_idx += 16)
+ {
+ float32x4_t a10 = vld1q_f32(input_data + offset + r_idx);
+ float32x4_t a11 = vld1q_f32(input_data + offset + r_idx + 4);
+ float32x4_t a12 = vld1q_f32(input_data + offset + r_idx + 8);
+ float32x4_t a13 = vld1q_f32(input_data + offset + r_idx + 12);
+
+ float32x4_t x0 = vaddq_f32(a10, a11);
+ float32x4_t x1 = vaddq_f32(a12, a13);
+
+ float32x4_t y0 = vaddq_f32(x0, x1);
+ tmp_data_32x4 = vaddq_f32(tmp_data_32x4, y0);
+ }
for (; r_idx <= reduce_size - 8; r_idx += 8)
{
- float32x4_t a1 = vld1q_f32(input_data + r_idx);
- float32x4_t a2 = vld1q_f32(input_data + r_idx + 4);
+ float32x4_t a1 = vld1q_f32(input_data + offset + r_idx);
+ float32x4_t a2 = vld1q_f32(input_data + offset + r_idx + 4);
float32x4_t x = vaddq_f32(a1, a2);
tmp_data_32x4 = vaddq_f32(tmp_data_32x4, x);
}
{
if (r_idx == 0)
{
- output_data[idx] = input_data[idx * reduce_size];
+ output_data[idx] = input_data[offset];
}
else
{
- output_data[idx] += input_data[idx * reduce_size + r_idx];
+ output_data[idx] += input_data[offset + r_idx];
}
}
+ offset += reduce_size;
}
}
#endif // NEON
batches, input_height, input_width, depth, params.output_height, params.output_width,
height_scale, width_scale, input_shape, input_data, output_data, params.half_pixel_centers);
}
+
+inline void ComputeInterpolationValues(const int32_t value, const int32_t scale_10,
+ const bool half_pixel_centers, int32_t input_size,
+ int32_t *scaled_value, int32_t *lower_bound,
+ int32_t *upper_bound)
+{
+ if (half_pixel_centers)
+ {
+ *scaled_value = value * scale_10 + scale_10 / 2 - (1 << 9);
+ }
+ else
+ {
+ *scaled_value = value * scale_10;
+ }
+ *lower_bound = std::max(*scaled_value / (1 << 10), 0);
+ *upper_bound = std::min(*scaled_value / (1 << 10) + 1, input_size - 1);
+}
+
+inline void ResizeBilinear(const ResizeBilinearParams &op_params,
+ const Shape &unextended_input_shape, const int8_t *input_data,
+ const Shape &unextended_output_shape, int8_t *output_data)
+{
+ // If half_pixel_centers is True, align_corners must be False.
+ assert(!op_params.half_pixel_centers || !op_params.align_corners);
+ assert(unextended_input_shape.DimensionsCount() <= 4);
+ assert(unextended_output_shape.DimensionsCount() <= 4);
+ const Shape input_shape = Shape::ExtendedShape(4, unextended_input_shape);
+ const Shape output_shape = Shape::ExtendedShape(4, unextended_output_shape);
+
+ const int32_t batches = MatchingDim(input_shape, 0, output_shape, 0);
+ const int32_t input_height = input_shape.Dims(1);
+ const int32_t input_width = input_shape.Dims(2);
+ const int32_t depth = MatchingDim(input_shape, 3, output_shape, 3);
+
+ const int32_t output_height = op_params.output_height;
+ const int32_t output_width = op_params.output_width;
+
+ int32_t height_scale_10 = ((1 << 10) * input_height + output_height / 2) / output_height;
+ int32_t width_scale_10 = ((1 << 10) * input_width + output_width / 2) / output_width;
+ if (op_params.align_corners && output_height > 1)
+ {
+ height_scale_10 =
+ ((1 << 10) * (input_height - 1) + (output_height - 1) / 2) / (output_height - 1);
+ }
+ if (op_params.align_corners && output_width > 1)
+ {
+ width_scale_10 = ((1 << 10) * (input_width - 1) + (output_width - 1) / 2) / (output_width - 1);
+ }
+
+ for (int b = 0; b < batches; ++b)
+ {
+ for (int y = 0; y < output_height; ++y)
+ {
+ int32_t input_y, y0, y1;
+ ComputeInterpolationValues(y, height_scale_10, op_params.half_pixel_centers, input_height,
+ &input_y, &y0, &y1);
+ for (int x = 0; x < output_width; ++x)
+ {
+ int32_t input_x, x0, x1;
+ ComputeInterpolationValues(x, width_scale_10, op_params.half_pixel_centers, input_width,
+ &input_x, &x0, &x1);
+ for (int c = 0; c < depth; ++c)
+ {
+ const int64_t output_20_ll =
+ static_cast<int64_t>(input_data[Offset(input_shape, b, y0, x0, c)]) *
+ ((1 << 10) - (input_y - (1 << 10) * y0)) * ((1 << 10) - (input_x - (1 << 10) * x0));
+ const int64_t output_20_lu =
+ static_cast<int64_t>(input_data[Offset(input_shape, b, y1, x0, c)]) *
+ (input_y - (1 << 10) * y0) * ((1 << 10) - (input_x - (1 << 10) * x0));
+ const int64_t output_20_rl =
+ static_cast<int64_t>(input_data[Offset(input_shape, b, y0, x1, c)]) *
+ ((1 << 10) - (input_y - (1 << 10) * y0)) * (input_x - (1 << 10) * x0);
+ const int64_t output_20_ru =
+ static_cast<int64_t>(input_data[Offset(input_shape, b, y1, x1, c)]) *
+ (input_y - (1 << 10) * y0) * (input_x - (1 << 10) * x0);
+ const int64_t output_20 = output_20_ll + output_20_lu + output_20_rl + output_20_ru;
+ const int64_t round = (output_20 > 0) ? (1 << 19) : -(1 << 19);
+ const int8_t interpolation = static_cast<int8_t>((output_20 + round) / (1 << 20));
+ output_data[Offset(output_shape, b, y, x, c)] = interpolation;
+ }
+ }
+ }
+ }
+}
+
} // namespace cker
} // namespace nnfw
#include "cker/Types.h"
#include "cker/eigen/Utils.h"
+#if __aarch64__ && __clang__
+#define TFLITE_SOFTMAX_USE_UINT16_LUT
+#endif
+
#include <Eigen/Core>
#include <fixedpoint/fixedpoint.h>
#include <cmath>
}
}
}
-}
+} // namespace reference
// Performs softmax along the input of size (input_size * batch_size).
inline void Softmax(const float *in, const int input_size, const int batch_size, const float beta,
out_mat.array().rowwise() *= scale;
}
-inline void Softmax(const SoftmaxParams ¶ms, const Shape &input_shape,
- const uint8_t *input_data, const Shape &output_shape, uint8_t *output_data)
-{
- const int32_t input_beta_multiplier = params.input_multiplier;
- const int32_t input_beta_left_shift = params.input_left_shift;
- const int diff_min = params.diff_min;
- // The representation chosen for the input to the exp() function is Q5.26.
- // We need to leave extra space since values that we skip might be as large as
- // -32 before multiplying by input_beta_multiplier, and therefore as large as
- // -16 afterwards. Note that exp(-8) is definitely not insignificant to
- // accumulation, but exp(-16) definitely is.
- static const int kScaledDiffIntegerBits = 5;
- static const int kAccumulationIntegerBits = 12;
- using FixedPointScaledDiff = gemmlowp::FixedPoint<int32_t, kScaledDiffIntegerBits>;
- using FixedPointAccum = gemmlowp::FixedPoint<int32_t, kAccumulationIntegerBits>;
- using FixedPoint0 = gemmlowp::FixedPoint<int32_t, 0>;
+template <typename T> inline int32_t QuantizeSoftmaxOutput(float prob_rescaled, int32_t zero_point)
+{
+ const int32_t prob_rnd = static_cast<int32_t>(std::round(prob_rescaled));
+ return prob_rnd + zero_point;
+}
+
+#if !__aarch64__
+// With ARM64, rounding is faster than add + truncation.
+template <> inline int32_t QuantizeSoftmaxOutput<uint8_t>(float prob_rescaled, int32_t)
+{
+ return static_cast<int32_t>(prob_rescaled + 0.5f);
+}
+#endif
+
+inline void PopulateSoftmaxLookupTable(float *table, float input_scale, float beta)
+{
+ const float scale = -input_scale * beta;
+ const int32_t max_uint8 = std::numeric_limits<uint8_t>::max();
+ for (int32_t val = 0; val <= max_uint8; ++val)
+ {
+ table[max_uint8 - val] = expf(scale * val);
+ }
+}
+template <typename In, typename Out>
+inline void Softmax(const SoftmaxParams ¶ms, const Shape &input_shape, const In *input_data,
+ const Shape &output_shape, Out *output_data)
+{
const int trailing_dim = input_shape.DimensionsCount() - 1;
- const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
- const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+ const int excluding_last_dim = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+ const int last_dim = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
- for (int i = 0; i < outer_size; ++i)
+ const int32_t clamp_max = std::numeric_limits<Out>::max();
+ const int32_t clamp_min = std::numeric_limits<Out>::min();
+ for (int i = 0; i < excluding_last_dim; ++i)
{
- uint8_t max_in_row = 0;
- for (int c = 0; c < depth; ++c)
+ int32_t max_val = std::numeric_limits<In>::min();
+ // Find max quantized value.
+ for (int j = 0; j < last_dim; ++j)
{
- max_in_row = std::max(max_in_row, input_data[i * depth + c]);
+ max_val = std::max(max_val, static_cast<int32_t>(input_data[j]));
}
- FixedPointAccum sum_of_exps = FixedPointAccum::Zero();
- for (int c = 0; c < depth; ++c)
+ float sum_exp = 0.0f;
+ const int32_t max_uint8 = std::numeric_limits<uint8_t>::max();
+ const float *table_offset = ¶ms.table[max_uint8 - max_val];
+ // Calculate normalizer sum(exp(x)).
+ for (int j = 0; j < last_dim; ++j)
{
- int32_t input_diff = static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
- if (input_diff >= diff_min)
- {
- const int32_t input_diff_rescaled = MultiplyByQuantizedMultiplierGreaterThanOne(
- input_diff, input_beta_multiplier, input_beta_left_shift);
- const FixedPointScaledDiff scaled_diff_f8 =
- FixedPointScaledDiff::FromRaw(input_diff_rescaled);
- sum_of_exps = sum_of_exps + gemmlowp::Rescale<kAccumulationIntegerBits>(
- exp_on_negative_values(scaled_diff_f8));
- }
+ sum_exp += table_offset[input_data[j]];
}
- int32_t fixed_sum_of_exps = sum_of_exps.raw();
- int headroom_plus_one = CountLeadingZeros(static_cast<uint32_t>(fixed_sum_of_exps));
- // This is the number of bits to the left of the binary point above 1.0.
- // Consider fixed_sum_of_exps=1.25. In that case shifted_scale=0.8 and
- // no later adjustment will be needed.
- int num_bits_over_unit = kAccumulationIntegerBits - headroom_plus_one;
- int32_t shifted_sum_minus_one =
- static_cast<int32_t>((static_cast<uint32_t>(fixed_sum_of_exps) << headroom_plus_one) -
- (static_cast<uint32_t>(1) << 31));
+ const float inv_sum_exp = 1.0f / (sum_exp * params.scale);
+ // Normalize and quantize probabilities.
+ for (int j = 0; j < last_dim; ++j)
+ {
+ const float prob_rescaled = table_offset[input_data[j]] * inv_sum_exp;
+ const int32_t prob_quantized = QuantizeSoftmaxOutput<Out>(prob_rescaled, params.zero_point);
+ output_data[j] = static_cast<Out>(std::max(std::min(clamp_max, prob_quantized), clamp_min));
+ }
+ input_data += last_dim;
+ output_data += last_dim;
+ }
+}
+
+#ifdef TFLITE_SOFTMAX_USE_UINT16_LUT
+// Looks up each element of <indices> in <table>, returns them in a vector.
+inline uint8x16_t aarch64_lookup_vector(const uint8x16x4_t table[4], uint8x16_t indices)
+{
+ // Look up in 1st quarter of the table: top 2 bits of indices == 00
+ uint8x16_t output1 = vqtbl4q_u8(table[0], indices);
+ // Look up in 2nd quarter of the table: top 2 bits of indices == 01
+ uint8x16_t output2 = vqtbl4q_u8(table[1], veorq_u8(indices, vdupq_n_u8(0x40)));
+ // Look up in 3rd quarter of the table: top 2 bits of indices == 10
+ uint8x16_t output3 = vqtbl4q_u8(table[2], veorq_u8(indices, vdupq_n_u8(0x80)));
+ // Look up in 4th quarter of the table: top 2 bits of indices == 11
+ uint8x16_t output4 = vqtbl4q_u8(table[3], veorq_u8(indices, vdupq_n_u8(0xc0)));
+
+ // Combine result of the 4 lookups.
+ return vorrq_u8(vorrq_u8(output1, output2), vorrq_u8(output3, output4));
+}
- FixedPoint0 shifted_scale =
- one_over_one_plus_x_for_x_in_0_1(FixedPoint0::FromRaw(shifted_sum_minus_one));
+inline void PopulateSoftmaxUInt8LookupTable(uint8_t *uint8_table1, uint8_t *uint8_table2,
+ float input_scale, float beta)
+{
+ const float scale = input_scale * beta;
+ const int32_t max_uint8 = std::numeric_limits<uint8_t>::max();
+ const int32_t max_uint16 = std::numeric_limits<uint16_t>::max();
- for (int c = 0; c < depth; ++c)
+ for (int32_t val = 0; val <= max_uint8; ++val)
+ {
+ float input_to_exp = scale * (val - max_uint8);
+ int32_t temp = static_cast<int>(expf(input_to_exp) * max_uint16 + 0.5);
+ temp = std::min(max_uint16, temp);
+ uint8_t part1 = temp >> 8;
+ uint8_t part2 = temp & 0xff;
+ uint8_table1[val] = static_cast<uint8_t>(part1);
+ uint8_table2[val] = static_cast<uint8_t>(part2);
+ }
+}
+
+inline int FindMaxValue(int size, const uint8_t *input_data, uint8_t offset)
+{
+ int32_t max_val = std::numeric_limits<uint8_t>::min();
+ int j = 0;
+
+ uint8x16_t max_val_dup = vdupq_n_u8(max_val);
+ uint8x16_t offset_dup = vdupq_n_u8(offset);
+ for (; j <= size - 16; j += 16)
+ {
+ uint8x16_t input_value = vld1q_u8(input_data + j);
+ input_value = veorq_u8(input_value, offset_dup);
+ max_val_dup = vmaxq_u8(input_value, max_val_dup);
+ }
+ max_val = std::max(max_val, static_cast<int32_t>(vmaxvq_u8(max_val_dup)));
+
+ for (; j < size; ++j)
+ {
+ max_val = std::max(max_val, static_cast<int32_t>(input_data[j] ^ offset));
+ }
+ return max_val;
+}
+
+#ifdef USE_NEON
+// Value_to_store layout:
+// [high_high, high_low, low_high, low_low].
+inline void StoreValue(int32x4x4_t value_to_store, int8_t *output)
+{
+ const int16x8_t result_1 =
+ vcombine_s16(vqmovn_s32(value_to_store.val[1]), vqmovn_s32(value_to_store.val[0]));
+ const int16x8_t result_2 =
+ vcombine_s16(vqmovn_s32(value_to_store.val[3]), vqmovn_s32(value_to_store.val[2]));
+ const int8x16_t result = vcombine_s8(vqmovn_s16(result_2), vqmovn_s16(result_1));
+ vst1q_s8(output, result);
+}
+
+// Value_to_store layout:
+// [high_high, high_low, low_high, low_low].
+inline void StoreValue(int32x4x4_t value_to_store, uint8_t *output)
+{
+ const uint16x8_t result_1 =
+ vcombine_u16(vqmovn_u32(vreinterpretq_u32_s32(value_to_store.val[1])),
+ vqmovn_u32(vreinterpretq_u32_s32(value_to_store.val[0])));
+ const uint16x8_t result_2 =
+ vcombine_u16(vqmovn_u32(vreinterpretq_u32_s32(value_to_store.val[3])),
+ vqmovn_u32(vreinterpretq_u32_s32(value_to_store.val[2])));
+ const uint8x16_t result = vcombine_u8(vqmovn_u16(result_2), vqmovn_u16(result_1));
+ vst1q_u8(output, result);
+}
+
+#endif
+
+template <typename In, typename Out>
+inline void SoftmaxInt8LUT(const SoftmaxParams ¶ms, const Shape &input_shape,
+ const In *input_data, const Shape &output_shape, Out *output_data)
+{
+ const int trailing_dim = input_shape.DimensionsCount() - 1;
+ const int excluding_last_dim = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+ const int last_dim = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+
+ const int32_t clamp_max = std::numeric_limits<Out>::max();
+ const int32_t clamp_min = std::numeric_limits<Out>::min();
+
+ // Offset is used to interpret the input data "correctly".
+ // If the input is uint8, the data will be unchanged.
+ // If the input is int8, since it will be reinterpret as uint8.
+ // e.g.,
+ // int8 127 will be applied "offset" to become 255 in uint8.
+ uint8_t offset = 0;
+ if (std::is_same<In, int8_t>::value)
+ {
+ offset = 0x80;
+ }
+
+ const uint8_t *input_data_uint = reinterpret_cast<const uint8_t *>(input_data);
+
+ // This code uses ARM64-only instructions.
+ // TODO(b/143709993): Port to ARMv7
+
+ // Load the tables into registers. (4*4 128-bit registers)
+ uint8x16x4_t table1[4];
+ table1[0] = vld1q_u8_x4(params.uint8_table1 + 16 * 4 * 0);
+ table1[1] = vld1q_u8_x4(params.uint8_table1 + 16 * 4 * 1);
+ table1[2] = vld1q_u8_x4(params.uint8_table1 + 16 * 4 * 2);
+ table1[3] = vld1q_u8_x4(params.uint8_table1 + 16 * 4 * 3);
+
+ uint8x16x4_t table2[4];
+ table2[0] = vld1q_u8_x4(params.uint8_table2 + 16 * 4 * 0);
+ table2[1] = vld1q_u8_x4(params.uint8_table2 + 16 * 4 * 1);
+ table2[2] = vld1q_u8_x4(params.uint8_table2 + 16 * 4 * 2);
+ table2[3] = vld1q_u8_x4(params.uint8_table2 + 16 * 4 * 3);
+
+ for (int i = 0; i < excluding_last_dim; ++i)
+ {
+ // Find max quantized value.
+ int32_t max_val = FindMaxValue(last_dim, input_data_uint, offset);
+
+ int32_t sum_exp = 0;
+ const int32_t max_uint8 = std::numeric_limits<uint8_t>::max();
+ const uint8_t table_offset = max_uint8 - max_val;
+
+ // Calculate normalizer sum(exp(x)).
+ int sum_j = 0;
+ uint8x16_t table_offset_dup = vdupq_n_u8(table_offset);
+ uint8x16_t offset_dup = vdupq_n_u8(offset);
+ uint32x4_t sum_4 = vdupq_n_u32(0);
+ const int multiplier_shift = 8;
+ for (; sum_j <= last_dim - 16; sum_j += 16)
{
- int32_t input_diff = static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
- if (input_diff >= diff_min)
- {
- const int32_t input_diff_rescaled = MultiplyByQuantizedMultiplierGreaterThanOne(
- input_diff, input_beta_multiplier, input_beta_left_shift);
- const FixedPointScaledDiff scaled_diff_f8 =
- FixedPointScaledDiff::FromRaw(input_diff_rescaled);
-
- FixedPoint0 exp_in_0 = exp_on_negative_values(scaled_diff_f8);
- int32_t unsat_output = gemmlowp::RoundingDivideByPOT((shifted_scale * exp_in_0).raw(),
- num_bits_over_unit + 31 - 8);
-
- output_data[i * depth + c] = static_cast<uint8_t>(
- std::max(std::min(unsat_output, static_cast<int32_t>(255)), static_cast<int32_t>(0)));
- }
- else
- {
- output_data[i * depth + c] = 0;
- }
+ uint8x16_t input_value = vld1q_u8(input_data_uint + sum_j);
+ input_value = veorq_u8(input_value, offset_dup);
+ input_value = vaddq_u8(input_value, table_offset_dup);
+
+ const uint8x16_t output1 = aarch64_lookup_vector(table1, input_value);
+ const uint8x16_t output2 = aarch64_lookup_vector(table2, input_value);
+
+ uint16x8_t exp_value1 = vshll_n_u8(vget_high_u8(output1), multiplier_shift);
+ uint16x8_t exp_value2 = vshll_n_u8(vget_low_u8(output1), multiplier_shift);
+
+ exp_value1 = vaddw_u8(exp_value1, vget_high_u8(output2));
+ exp_value2 = vaddw_u8(exp_value2, vget_low_u8(output2));
+
+ sum_4 = vpadalq_u16(sum_4, exp_value1);
+ sum_4 = vpadalq_u16(sum_4, exp_value2);
+ }
+ int temp = vgetq_lane_u32(sum_4, 0) + vgetq_lane_u32(sum_4, 1) + vgetq_lane_u32(sum_4, 2) +
+ vgetq_lane_u32(sum_4, 3);
+ sum_exp += temp;
+
+ for (; sum_j < last_dim; ++sum_j)
+ {
+ const uint8_t index = (input_data_uint[sum_j] ^ offset) + table_offset;
+
+ uint8_t part1 = params.uint8_table1[index];
+ uint8_t part2 = params.uint8_table2[index];
+ sum_exp += ((part1 << 8) + part2);
+ }
+
+ const float inv_sum_exp = 1.0f / (sum_exp * params.scale);
+
+ int32_t multiplier, shift;
+ QuantizeMultiplier(inv_sum_exp, &multiplier, &shift);
+
+ // Normalize and quantize probabilities.
+ int j = 0;
+ const int32x4_t output_zp_dup = vdupq_n_s32(params.zero_point);
+ const int32x4_t max_val_dup = vdupq_n_s32(clamp_max);
+ const int32x4_t min_val_dup = vdupq_n_s32(clamp_min);
+
+ for (; j <= last_dim - 16; j += 16)
+ {
+ uint8x16_t input_value = vld1q_u8(input_data_uint + j);
+ input_value = veorq_u8(input_value, offset_dup);
+ input_value = vaddq_u8(input_value, table_offset_dup);
+
+ const uint8x16_t output1 = aarch64_lookup_vector(table1, input_value);
+ const uint8x16_t output2 = aarch64_lookup_vector(table2, input_value);
+
+ uint16x8_t exp_value1 = vshll_n_u8(vget_high_u8(output1), multiplier_shift);
+ uint16x8_t exp_value2 = vshll_n_u8(vget_low_u8(output1), multiplier_shift);
+
+ exp_value1 = vaddw_u8(exp_value1, vget_high_u8(output2));
+ exp_value2 = vaddw_u8(exp_value2, vget_low_u8(output2));
+
+ int32x4x4_t output_value;
+ output_value.val[0] = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(exp_value1)));
+ output_value.val[1] = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(exp_value1)));
+ output_value.val[2] = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(exp_value2)));
+ output_value.val[3] = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(exp_value2)));
+
+ int32x4x4_t temp_val = MultiplyByQuantizedMultiplier4Rows(output_value, multiplier, shift);
+
+ temp_val.val[0] = vaddq_s32(temp_val.val[0], output_zp_dup);
+ temp_val.val[1] = vaddq_s32(temp_val.val[1], output_zp_dup);
+ temp_val.val[2] = vaddq_s32(temp_val.val[2], output_zp_dup);
+ temp_val.val[3] = vaddq_s32(temp_val.val[3], output_zp_dup);
+
+ temp_val.val[0] = vmaxq_s32(vminq_s32(temp_val.val[0], max_val_dup), min_val_dup);
+ temp_val.val[1] = vmaxq_s32(vminq_s32(temp_val.val[1], max_val_dup), min_val_dup);
+ temp_val.val[2] = vmaxq_s32(vminq_s32(temp_val.val[2], max_val_dup), min_val_dup);
+ temp_val.val[3] = vmaxq_s32(vminq_s32(temp_val.val[3], max_val_dup), min_val_dup);
+
+ StoreValue(temp_val, output_data + j);
+ }
+ for (; j < last_dim; ++j)
+ {
+ const uint8_t index = (input_data_uint[j] ^ offset) + table_offset;
+ const uint8_t part1 = params.uint8_table1[index];
+ const uint8_t part2 = params.uint8_table2[index];
+ const int32_t exp_value = (part1 << 8) + part2;
+ const int32_t output_value = MultiplyByQuantizedMultiplier(exp_value, multiplier, shift);
+
+ output_data[j] = static_cast<Out>(
+ std::max(std::min(clamp_max, output_value + params.zero_point), clamp_min));
}
+ input_data_uint += last_dim;
+ output_data += last_dim;
}
}
+#endif
} // namespace cker
} // namespace nnfw
Distribution());
}
-inline void StatelessRandomUniform(const Shape &shape_shape, const int *shape_data,
- const Shape &seed_shape, const int *seed_data,
+inline void StatelessRandomUniform(const Shape &shape_shape, const int32_t *shape_data,
+ const Shape &seed_shape, const int32_t *seed_data,
const Shape &output_shape, float *output_data)
{
Tensor shape_t;
return flat_size;
}
-} // namespace anonymous (util)
+} // namespace
// Transpose2D only deals with typical 2D matrix transpose ops.
// Perform transpose by transposing 4x4 blocks of the input, proceeding from
namespace optimized
{
+/* Old version: For Sub(float) and Div. */
template <typename ElementwiseF, typename ScalarBroadcastF, typename T>
inline void BinaryBroadcastFiveFold(const BinaryArithmeticOpParam ¶ms, bool switch_inputs,
const Shape & /* unswitched_input1_shape */,
}
}
-inline int32_t quant8_sum(const BinaryArithmeticOpParam ¶ms, const uint8_t input1_data,
- const uint8_t input2_data)
+// New version: For Mul, Add and Sub(quant8)
+template <typename ElementwiseF, typename ScalarBroadcastF, typename T>
+inline void BinaryBroadcastFiveFold(const BinaryArithmeticOpParam &unswitched_params,
+ const Shape & /* unswitched_input1_shape */,
+ const T *unswitched_input1_data,
+ const Shape & /* unswitched_input2_shape */,
+ const T *unswitched_input2_data,
+ const Shape & /* output_shape */, T *output_data,
+ ElementwiseF elementwise_f, ScalarBroadcastF scalar_broadcast_f)
+{
+ BinaryArithmeticOpParam switched_params = unswitched_params;
+ switched_params.input1_offset = unswitched_params.input2_offset;
+ switched_params.input1_multiplier = unswitched_params.input2_multiplier;
+ switched_params.input1_shift = unswitched_params.input2_shift;
+ switched_params.input2_offset = unswitched_params.input1_offset;
+ switched_params.input2_multiplier = unswitched_params.input1_multiplier;
+ switched_params.input2_shift = unswitched_params.input1_shift;
+
+ const bool use_unswitched =
+ unswitched_params.broadcast_category == BroadcastableOpCategory::kFirstInputBroadcastsFast;
+
+ const BinaryArithmeticOpParam ¶ms = use_unswitched ? unswitched_params : switched_params;
+ const T *input1_data = use_unswitched ? unswitched_input1_data : unswitched_input2_data;
+ const T *input2_data = use_unswitched ? unswitched_input2_data : unswitched_input1_data;
+
+ // Fivefold nested loops. The second input resets its position for each
+ // iteration of the second loop. The first input resets its position at the
+ // beginning of the fourth loop. The innermost loop is an elementwise add of
+ // sections of the arrays.
+ T *output_data_ptr = output_data;
+ const T *input1_data_ptr = input1_data;
+ const T *input2_data_reset = input2_data;
+ // In the fivefold pattern, y0, y2 and y4 are not broadcast, and so shared
+ // between input shapes. y3 for input 1 is always broadcast, and so the
+ // dimension there is 1, whereas optionally y1 might be broadcast for
+ // input 2. Put another way, input1.shape.FlatSize = y0 * y1 * y2 * y4,
+ // input2.shape.FlatSize = y0 * y2 * y3 * y4.
+ int y0 = params.broadcast_shape[0];
+ int y1 = params.broadcast_shape[1];
+ int y2 = params.broadcast_shape[2];
+ int y3 = params.broadcast_shape[3];
+ int y4 = params.broadcast_shape[4];
+ if (y4 > 1)
+ {
+ // General fivefold pattern, with y4 > 1 so there is a non-broadcast inner
+ // dimension.
+ for (int i0 = 0; i0 < y0; ++i0)
+ {
+ const T *input2_data_ptr = nullptr;
+ for (int i1 = 0; i1 < y1; ++i1)
+ {
+ input2_data_ptr = input2_data_reset;
+ for (int i2 = 0; i2 < y2; ++i2)
+ {
+ for (int i3 = 0; i3 < y3; ++i3)
+ {
+ elementwise_f(y4, params, input1_data_ptr, input2_data_ptr, output_data_ptr);
+ input2_data_ptr += y4;
+ output_data_ptr += y4;
+ }
+ // We have broadcast y4 of input1 data y3 times, and now move on.
+ input1_data_ptr += y4;
+ }
+ }
+ // We have broadcast y2*y3*y4 of input2 data y1 times, and now move on.
+ input2_data_reset = input2_data_ptr;
+ }
+ }
+ else
+ {
+ // Special case of y4 == 1, in which the innermost loop is a single
+ // element and can be combined with the next (y3) as an inner broadcast.
+ //
+ // Note that this handles the case of pure scalar broadcast when
+ // y0 == y1 == y2 == 1. With low overhead it handles cases such as scalar
+ // broadcast with batch (as y2 > 1).
+ //
+ // NOTE The process is the same as the above general case except
+ // simplified for y4 == 1 and the loop over y3 is contained within the
+ // AddScalarBroadcast function.
+ for (int i0 = 0; i0 < y0; ++i0)
+ {
+ const T *input2_data_ptr = nullptr;
+ for (int i1 = 0; i1 < y1; ++i1)
+ {
+ input2_data_ptr = input2_data_reset;
+ for (int i2 = 0; i2 < y2; ++i2)
+ {
+ scalar_broadcast_f(y3, params, *input1_data_ptr, input2_data_ptr, output_data_ptr);
+ input2_data_ptr += y3;
+ output_data_ptr += y3;
+ input1_data_ptr += 1;
+ }
+ }
+ input2_data_reset = input2_data_ptr;
+ }
+ }
+}
+
+template <typename T>
+inline typename std::enable_if_t<is_quant8<T>::value, int32_t>
+quant8_sum(const BinaryArithmeticOpParam ¶ms, const T input1_data, const T input2_data)
{
const int32_t input1_val = params.input1_offset + input1_data;
const int32_t input2_val = params.input2_offset + input2_data;
return clamped_output;
}
-inline void AddElementwiseQuant8(int size, const BinaryArithmeticOpParam ¶ms,
- const uint8_t *input1_data, const uint8_t *input2_data,
- uint8_t *output_data)
+inline void AddElementwise(int size, const BinaryArithmeticOpParam ¶ms,
+ const uint8_t *input1_data, const uint8_t *input2_data,
+ uint8_t *output_data)
{
int i = 0;
}
}
+inline void AddElementwise(int size, const BinaryArithmeticOpParam ¶ms,
+ const int8_t *input1_data, const int8_t *input2_data,
+ int8_t *output_data)
+{
+ int i = 0;
+#ifdef USE_NEON
+ const int8x16_t output_activation_min_vector = vdupq_n_s8(params.quantized_activation_min);
+ const int8x16_t output_activation_max_vector = vdupq_n_s8(params.quantized_activation_max);
+
+ const int input1_left_shift = params.left_shift + params.input1_shift;
+ const int input2_left_shift = params.left_shift + params.input2_shift;
+ const int32x4_t input1_left_dup = vdupq_n_s32(input1_left_shift);
+ const int32x4_t input2_left_dup = vdupq_n_s32(input2_left_shift);
+
+ const int16x8_t input1_offset_dup = vdupq_n_s16(params.input1_offset);
+ const int16x8_t input2_offset_dup = vdupq_n_s16(params.input2_offset);
+
+ for (; i <= size - 16; i += 16)
+ {
+ const int8x16_t input1_val_original = vld1q_s8(input1_data + i);
+ const int8x16_t input2_val_original = vld1q_s8(input2_data + i);
+
+ const int16x8_t input1_val_s16_high = vmovl_s8(vget_high_s8(input1_val_original));
+ const int16x8_t input1_val_s16_low = vmovl_s8(vget_low_s8(input1_val_original));
+
+ const int16x8_t input2_val_s16_high = vmovl_s8(vget_high_s8(input2_val_original));
+ const int16x8_t input2_val_s16_low = vmovl_s8(vget_low_s8(input2_val_original));
+ const int16x8_t input1_val_high = vaddq_s16(input1_val_s16_high, input1_offset_dup);
+ const int16x8_t input2_val_high = vaddq_s16(input2_val_s16_high, input2_offset_dup);
+ const int16x8_t input1_val_low = vaddq_s16(input1_val_s16_low, input1_offset_dup);
+ const int16x8_t input2_val_low = vaddq_s16(input2_val_s16_low, input2_offset_dup);
+ const int16x4_t input1_val_high_high = vget_high_s16(input1_val_high);
+ const int16x4_t input1_val_high_low = vget_low_s16(input1_val_high);
+ const int16x4_t input1_val_low_high = vget_high_s16(input1_val_low);
+ const int16x4_t input1_val_low_low = vget_low_s16(input1_val_low);
+ const int16x4_t input2_val_high_high = vget_high_s16(input2_val_high);
+ const int16x4_t input2_val_high_low = vget_low_s16(input2_val_high);
+ const int16x4_t input2_val_low_high = vget_high_s16(input2_val_low);
+ const int16x4_t input2_val_low_low = vget_low_s16(input2_val_low);
+ int32x4_t x111 = vmovl_s16(input1_val_low_low);
+ int32x4_t x112 = vmovl_s16(input1_val_low_high);
+ int32x4_t x121 = vmovl_s16(input1_val_high_low);
+ int32x4_t x122 = vmovl_s16(input1_val_high_high);
+ int32x4_t x211 = vmovl_s16(input2_val_low_low);
+ int32x4_t x212 = vmovl_s16(input2_val_low_high);
+ int32x4_t x221 = vmovl_s16(input2_val_high_low);
+ int32x4_t x222 = vmovl_s16(input2_val_high_high);
+
+ x111 = vshlq_s32(x111, input1_left_dup);
+ x112 = vshlq_s32(x112, input1_left_dup);
+ x121 = vshlq_s32(x121, input1_left_dup);
+ x122 = vshlq_s32(x122, input1_left_dup);
+ x211 = vshlq_s32(x211, input2_left_dup);
+ x212 = vshlq_s32(x212, input2_left_dup);
+ x221 = vshlq_s32(x221, input2_left_dup);
+ x222 = vshlq_s32(x222, input2_left_dup);
+ x111 = vqrdmulhq_n_s32(x111, params.input1_multiplier);
+ x112 = vqrdmulhq_n_s32(x112, params.input1_multiplier);
+ x121 = vqrdmulhq_n_s32(x121, params.input1_multiplier);
+ x122 = vqrdmulhq_n_s32(x122, params.input1_multiplier);
+ x211 = vqrdmulhq_n_s32(x211, params.input2_multiplier);
+ x212 = vqrdmulhq_n_s32(x212, params.input2_multiplier);
+ x221 = vqrdmulhq_n_s32(x221, params.input2_multiplier);
+ x222 = vqrdmulhq_n_s32(x222, params.input2_multiplier);
+ int32x4_t s11 = vaddq_s32(x111, x211);
+ int32x4_t s12 = vaddq_s32(x112, x212);
+ int32x4_t s21 = vaddq_s32(x121, x221);
+ int32x4_t s22 = vaddq_s32(x122, x222);
+ s11 = vqrdmulhq_n_s32(s11, params.output_multiplier);
+ s12 = vqrdmulhq_n_s32(s12, params.output_multiplier);
+ s21 = vqrdmulhq_n_s32(s21, params.output_multiplier);
+ s22 = vqrdmulhq_n_s32(s22, params.output_multiplier);
+ using gemmlowp::RoundingDivideByPOT;
+ s11 = RoundingDivideByPOT(s11, -params.output_shift);
+ s12 = RoundingDivideByPOT(s12, -params.output_shift);
+ s21 = RoundingDivideByPOT(s21, -params.output_shift);
+ s22 = RoundingDivideByPOT(s22, -params.output_shift);
+ const int16x4_t s11_narrowed = vmovn_s32(s11);
+ const int16x4_t s12_narrowed = vmovn_s32(s12);
+ const int16x4_t s21_narrowed = vmovn_s32(s21);
+ const int16x4_t s22_narrowed = vmovn_s32(s22);
+ const int16x8_t s1 =
+ vaddq_s16(vcombine_s16(s11_narrowed, s12_narrowed), vdupq_n_s16(params.output_offset));
+ const int16x8_t s2 =
+ vaddq_s16(vcombine_s16(s21_narrowed, s22_narrowed), vdupq_n_s16(params.output_offset));
+ const int8x16_t s = vcombine_s8(vqmovn_s16(s1), vqmovn_s16(s2));
+
+ const int8x16_t clamped =
+ vmaxq_s8(output_activation_min_vector, vminq_s8(output_activation_max_vector, s));
+ vst1q_s8(output_data + i, clamped);
+ }
+#endif // NEON
+
+ for (; i < size; ++i)
+ {
+ const int32_t input1_val = params.input1_offset + input1_data[i];
+ const int32_t input2_val = params.input2_offset + input2_data[i];
+ const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
+ const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
+ const int32_t scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+ shifted_input1_val, params.input1_multiplier, params.input1_shift);
+ const int32_t scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+ shifted_input2_val, params.input2_multiplier, params.input2_shift);
+ const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
+ const int32_t raw_output = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+ raw_sum, params.output_multiplier, params.output_shift) +
+ params.output_offset;
+ const int32_t clamped_output = std::min(params.quantized_activation_max,
+ std::max(params.quantized_activation_min, raw_output));
+ output_data[i] = static_cast<int8_t>(clamped_output);
+ }
+}
+
struct BinaryOpFuncAddFloat
{
#ifdef USE_NEON
BinaryOpScalarBroadcast<FUNC, BinaryOpActivationFloatMinMax>);
}
-inline void AddQuant8(const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape,
- const uint8_t *input1_data, const Shape &input2_shape,
- const uint8_t *input2_data, const Shape &output_shape, uint8_t *output_data)
+template <typename T>
+inline typename std::enable_if_t<is_quant8<T>::value>
+Add(const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape, const T *input1_data,
+ const Shape &input2_shape, const T *input2_data, const Shape &output_shape, T *output_data)
{
const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
- AddElementwiseQuant8(flat_size, params, input1_data, input2_data, output_data);
+ AddElementwise(flat_size, params, input1_data, input2_data, output_data);
}
inline void Add(const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape,
// Scalar-broadcast add that can be used for inner loop of more general
// broadcast add, so that, for example, scalar-broadcast with batch will still
// be fast.
-inline void AddScalarBroadcastQuant8(int size, const BinaryArithmeticOpParam ¶ms,
- uint8_t broadcast_value, const uint8_t *input2_data,
- uint8_t *output_data)
+inline void AddScalarBroadcast(int size, const BinaryArithmeticOpParam ¶ms,
+ uint8_t broadcast_value, const uint8_t *input2_data,
+ uint8_t *output_data)
{
int i = 0;
int32_t clamped_output;
}
}
-inline void BroadcastAddDispatchQuant8(const BinaryArithmeticOpParam ¶ms,
- const Shape &input1_shape, const uint8_t *input1_data,
- const Shape &input2_shape, const uint8_t *input2_data,
- const Shape &output_shape, uint8_t *output_data)
+// Scalar-broadcast add that can be used for inner loop of more general
+// broadcast add, so that, for example, scalar-broadcast with batch will still
+// be fast.
+inline void AddScalarBroadcast(int size, const BinaryArithmeticOpParam ¶ms, int8_t input1_data,
+ const int8_t *input2_data, int8_t *output_data)
{
- if (params.broadcast_category == BroadcastableOpCategory::kGenericBroadcast)
+ using gemmlowp::RoundingDivideByPOT;
+ int i = 0;
+#ifdef USE_NEON
+ const int32x4_t left_shift_dup = vdupq_n_s32(params.left_shift);
+ const int8x8_t output_activation_min_vector = vdup_n_s8(params.quantized_activation_min);
+ const int8x8_t output_activation_max_vector = vdup_n_s8(params.quantized_activation_max);
+
+ // Process broadcast scalar.
+ const int8x8_t input1_val_original = vdup_n_s8(input1_data);
+ const int16x8_t input1_val_s16 = vmovl_s8(input1_val_original);
+ const int16x8_t input1_val = vaddq_s16(input1_val_s16, vdupq_n_s16(params.input1_offset));
+ const int16x4_t input1_val_high = vget_high_s16(input1_val);
+ const int16x4_t input1_val_low = vget_low_s16(input1_val);
+ int32x4_t x11 = vmovl_s16(input1_val_low);
+ int32x4_t x12 = vmovl_s16(input1_val_high);
+ x11 = vshlq_s32(x11, left_shift_dup);
+ x12 = vshlq_s32(x12, left_shift_dup);
+ x11 = vqrdmulhq_n_s32(x11, params.input1_multiplier);
+ x12 = vqrdmulhq_n_s32(x12, params.input1_multiplier);
+ const int32x4_t input1_shift_dup = vdupq_n_s32(params.input1_shift);
+ x11 = vshlq_s32(x11, input1_shift_dup);
+ x12 = vshlq_s32(x12, input1_shift_dup);
+
+ for (; i <= size - 8; i += 8)
{
- const std::function<uint8_t(const BinaryArithmeticOpParam &, const uint8_t &, const uint8_t &)>
- fn =
- [](const BinaryArithmeticOpParam ¶ms, const uint8_t &a, const uint8_t &b) -> uint8_t {
- return static_cast<uint8_t>(quant8_sum(params, a, b));
- };
- reference::BroadcastBinaryArithmeticOpSlowQuant8(
- params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data, fn);
+ const int8x8_t input2_val_original = vld1_s8(input2_data + i);
+ const int16x8_t input2_val_s16 = vmovl_s8(input2_val_original);
+ const int16x8_t input2_val = vaddq_s16(input2_val_s16, vdupq_n_s16(params.input2_offset));
+ const int16x4_t input2_val_high = vget_high_s16(input2_val);
+ const int16x4_t input2_val_low = vget_low_s16(input2_val);
+ int32x4_t x21 = vmovl_s16(input2_val_low);
+ int32x4_t x22 = vmovl_s16(input2_val_high);
+ x21 = vshlq_s32(x21, left_shift_dup);
+ x22 = vshlq_s32(x22, left_shift_dup);
+ x21 = vqrdmulhq_n_s32(x21, params.input2_multiplier);
+ x22 = vqrdmulhq_n_s32(x22, params.input2_multiplier);
+ const int32x4_t input2_shift_dup = vdupq_n_s32(params.input2_shift);
+ x21 = vshlq_s32(x21, input2_shift_dup);
+ x22 = vshlq_s32(x22, input2_shift_dup);
+ int32x4_t s1 = vaddq_s32(x11, x21);
+ int32x4_t s2 = vaddq_s32(x12, x22);
+ s1 = vqrdmulhq_n_s32(s1, params.output_multiplier);
+ s2 = vqrdmulhq_n_s32(s2, params.output_multiplier);
+ s1 = RoundingDivideByPOT(s1, -params.output_shift);
+ s2 = RoundingDivideByPOT(s2, -params.output_shift);
+ const int16x4_t s1_narrowed = vmovn_s32(s1);
+ const int16x4_t s2_narrowed = vmovn_s32(s2);
+ const int16x8_t s =
+ vaddq_s16(vcombine_s16(s1_narrowed, s2_narrowed), vdupq_n_s16(params.output_offset));
+ const int8x8_t clamped =
+ vmax_s8(output_activation_min_vector, vmin_s8(output_activation_max_vector, vqmovn_s16(s)));
+ vst1_s8(output_data + i, clamped);
}
- else
+#endif // NEON
+
+ if (i < size)
{
- BinaryBroadcastFiveFold(
- params, params.broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast,
- input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data,
- static_cast<void (*)(int, const BinaryArithmeticOpParam &, const uint8_t *, const uint8_t *,
- uint8_t *)>(AddElementwiseQuant8),
- static_cast<void (*)(int, const BinaryArithmeticOpParam &, uint8_t, const uint8_t *,
- uint8_t *)>(AddScalarBroadcastQuant8));
+ // Process broadcast scalar.
+ const int32_t input1_val = params.input1_offset + input1_data;
+ const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
+ const int32_t scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+ shifted_input1_val, params.input1_multiplier, params.input1_shift);
+
+ for (; i < size; ++i)
+ {
+ const int32_t input2_val = params.input2_offset + input2_data[i];
+ const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
+ const int32_t scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+ shifted_input2_val, params.input2_multiplier, params.input2_shift);
+ const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
+ const int32_t raw_output = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+ raw_sum, params.output_multiplier, params.output_shift) +
+ params.output_offset;
+ const int32_t clamped_output = std::min(
+ params.quantized_activation_max, std::max(params.quantized_activation_min, raw_output));
+ output_data[i] = static_cast<int8_t>(clamped_output);
+ }
+ }
+}
+
+template <typename T>
+inline typename std::enable_if_t<is_quant8<T>::value>
+BroadcastAddDispatch(const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape,
+ const T *input1_data, const Shape &input2_shape, const T *input2_data,
+ const Shape &output_shape, T *output_data)
+{
+ if (params.broadcast_category == BroadcastableOpCategory::kGenericBroadcast)
+ {
+ const std::function<T(const BinaryArithmeticOpParam &, const T &, const T &)> fn =
+ [](const BinaryArithmeticOpParam ¶ms, const T &a, const T &b) {
+ return static_cast<T>(quant8_sum(params, a, b));
+ };
+ reference::BroadcastBinaryArithmeticOpSlow(params, input1_shape, input1_data, input2_shape,
+ input2_data, output_shape, output_data, fn);
+ return;
}
+
+ BinaryBroadcastFiveFold(
+ params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data,
+ static_cast<void (*)(int, const BinaryArithmeticOpParam &, const T *, const T *, T *)>(
+ AddElementwise),
+ static_cast<void (*)(int, const BinaryArithmeticOpParam &, T, const T *, T *)>(
+ AddScalarBroadcast));
}
inline void BroadcastAddDispatch(const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape,
}
}
-inline int32_t quant8_mul(const BinaryArithmeticOpParam ¶ms, const uint8_t input1_data,
- const uint8_t input2_data)
+template <typename T>
+inline typename std::enable_if_t<is_quant8<T>::value, int32_t>
+quant8_mul(const BinaryArithmeticOpParam ¶ms, const T input1_data, const T input2_data)
{
const int32_t input1_val = params.input1_offset + input1_data;
const int32_t input2_val = params.input2_offset + input2_data;
return clamped_output;
}
-inline void MulElementwiseQuant8(int size, const BinaryArithmeticOpParam ¶ms,
- const uint8_t *input1_data, const uint8_t *input2_data,
- uint8_t *output_data)
+inline void MulElementwise(int size, const BinaryArithmeticOpParam ¶ms,
+ const uint8_t *input1_data, const uint8_t *input2_data,
+ uint8_t *output_data)
{
int i = 0;
}
}
-inline void MulQuant8(const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape,
- const uint8_t *input1_data, const Shape &input2_shape,
- const uint8_t *input2_data, const Shape &output_shape, uint8_t *output_data)
+inline void MulElementwise(int size, const BinaryArithmeticOpParam ¶ms,
+ const int8_t *input1_data, const int8_t *input2_data,
+ int8_t *output_data)
+{
+ int i = 0;
+#ifdef USE_NEON
+ const int16x8_t input1_offset_vector = vdupq_n_s16(params.input1_offset);
+ const int16x8_t input2_offset_vector = vdupq_n_s16(params.input2_offset);
+ const int16x8_t output_offset_vector = vdupq_n_s16(params.output_offset);
+ const auto output_activation_min_vector = vdupq_n_s8(params.quantized_activation_min);
+ const auto output_activation_max_vector = vdupq_n_s8(params.quantized_activation_max);
+ const int left_shift = std::max(0, params.output_shift);
+ const int right_shift = std::max(0, -params.output_shift);
+ const int32x4_t left_shift_vec = vdupq_n_s32(left_shift);
+ for (; i <= size - 16; i += 16)
+ {
+ // We load / store 16 at a time, multiplying as four sets of 4 int32s.
+ const int8x16_t input1_val_original = vld1q_s8(input1_data + i);
+ const int8x16_t input2_val_original = vld1q_s8(input2_data + i);
+
+ const int16x8_t input1_val_s16_high = vmovl_s8(vget_high_s8(input1_val_original));
+ const int16x8_t input1_val_s16_low = vmovl_s8(vget_low_s8(input1_val_original));
+
+ const int16x8_t input2_val_s16_high = vmovl_s8(vget_high_s8(input2_val_original));
+ const int16x8_t input2_val_s16_low = vmovl_s8(vget_low_s8(input2_val_original));
+ const int16x8_t input1_val_high = vaddq_s16(input1_val_s16_high, input1_offset_vector);
+ const int16x8_t input2_val_high = vaddq_s16(input2_val_s16_high, input2_offset_vector);
+ const int16x8_t input1_val_low = vaddq_s16(input1_val_s16_low, input1_offset_vector);
+ const int16x8_t input2_val_low = vaddq_s16(input2_val_s16_low, input2_offset_vector);
+ const int16x4_t input1_val_high_high = vget_high_s16(input1_val_high);
+ const int16x4_t input1_val_high_low = vget_low_s16(input1_val_high);
+ const int16x4_t input1_val_low_high = vget_high_s16(input1_val_low);
+ const int16x4_t input1_val_low_low = vget_low_s16(input1_val_low);
+ const int16x4_t input2_val_high_high = vget_high_s16(input2_val_high);
+ const int16x4_t input2_val_high_low = vget_low_s16(input2_val_high);
+ const int16x4_t input2_val_low_high = vget_high_s16(input2_val_low);
+ const int16x4_t input2_val_low_low = vget_low_s16(input2_val_low);
+
+ auto p1 = vmull_s16(input2_val_high_high, input1_val_high_high);
+ auto p2 = vmull_s16(input2_val_high_low, input1_val_high_low);
+ auto p3 = vmull_s16(input2_val_low_high, input1_val_low_high);
+ auto p4 = vmull_s16(input2_val_low_low, input1_val_low_low);
+
+ p1 = vshlq_s32(p1, left_shift_vec);
+ p2 = vshlq_s32(p2, left_shift_vec);
+ p3 = vshlq_s32(p3, left_shift_vec);
+ p4 = vshlq_s32(p4, left_shift_vec);
+
+ p1 = vqrdmulhq_n_s32(p1, params.output_multiplier);
+ p2 = vqrdmulhq_n_s32(p2, params.output_multiplier);
+ p3 = vqrdmulhq_n_s32(p3, params.output_multiplier);
+ p4 = vqrdmulhq_n_s32(p4, params.output_multiplier);
+ using gemmlowp::RoundingDivideByPOT;
+ p1 = RoundingDivideByPOT(p1, right_shift);
+ p2 = RoundingDivideByPOT(p2, right_shift);
+ p3 = RoundingDivideByPOT(p3, right_shift);
+ p4 = RoundingDivideByPOT(p4, right_shift);
+
+ const auto p1_narrowed = vqmovn_s32(p1);
+ const auto p2_narrowed = vqmovn_s32(p2);
+ const auto p3_narrowed = vqmovn_s32(p3);
+ const auto p4_narrowed = vqmovn_s32(p4);
+
+ const int16x8_t p_part1 =
+ vaddq_s16(vcombine_s16(p2_narrowed, p1_narrowed), output_offset_vector);
+ const int16x8_t p_part2 =
+ vaddq_s16(vcombine_s16(p4_narrowed, p3_narrowed), output_offset_vector);
+ const int8x16_t p = vcombine_s8(vqmovn_s16(p_part2), vqmovn_s16(p_part1));
+
+ const auto clamped =
+ vmaxq_s8(output_activation_min_vector, vminq_s8(output_activation_max_vector, p));
+ vst1q_s8(output_data + i, clamped);
+ }
+#endif // NEON
+
+ for (; i < size; ++i)
+ {
+ const int32_t input1_val = params.input1_offset + input1_data[i];
+ const int32_t input2_val = params.input2_offset + input2_data[i];
+ const int32_t unclamped_result =
+ params.output_offset + MultiplyByQuantizedMultiplier(input1_val * input2_val,
+ params.output_multiplier,
+ params.output_shift);
+ const int32_t clamped_output = std::min(
+ params.quantized_activation_max, std::max(params.quantized_activation_min, unclamped_result));
+ output_data[i] = static_cast<int8_t>(clamped_output);
+ }
+}
+
+template <typename T>
+inline typename std::enable_if_t<is_quant8<T>::value>
+Mul(const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape, const T *input1_data,
+ const Shape &input2_shape, const T *input2_data, const Shape &output_shape, T *output_data)
{
const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
- MulElementwiseQuant8(flat_size, params, input1_data, input2_data, output_data);
+ MulElementwise(flat_size, params, input1_data, input2_data, output_data);
}
inline void Mul(const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape,
(*implFuncs.first)(flat_size, params, input1_data, input2_data, output_data);
}
-inline void MulSimpleBroadcastQuant8(int size, const BinaryArithmeticOpParam ¶ms,
- const uint8_t broadcast_value, const uint8_t *input2_data,
- uint8_t *output_data)
+inline void MulSimpleBroadcast(int size, const BinaryArithmeticOpParam ¶ms,
+ const uint8_t broadcast_value, const uint8_t *input2_data,
+ uint8_t *output_data)
{
int i = 0;
int32_t clamped_output;
}
}
-inline void BroadcastMulDispatchQuant8(const BinaryArithmeticOpParam ¶ms,
- const Shape &input1_shape, const uint8_t *input1_data,
- const Shape &input2_shape, const uint8_t *input2_data,
- const Shape &output_shape, uint8_t *output_data)
+// Broadcast mul that can often be used for inner loop of broadcast Mul.
+inline void MulSimpleBroadcast(int size, const BinaryArithmeticOpParam ¶ms,
+ const int8_t broadcast_value, const int8_t *input2_data,
+ int8_t *output_data)
+{
+ const int16_t input1_val = params.input1_offset + broadcast_value;
+
+ int i = 0;
+#ifdef USE_NEON
+ const auto input2_offset_vector = vdupq_n_s16(params.input2_offset);
+ const auto output_offset_vector = vdupq_n_s16(params.output_offset);
+ const auto output_activation_min_vector = vdupq_n_s8(params.quantized_activation_min);
+ const auto output_activation_max_vector = vdupq_n_s8(params.quantized_activation_max);
+ const int left_shift = std::max(0, params.output_shift);
+ const int right_shift = std::max(0, -params.output_shift);
+ const int32x4_t left_shift_vec = vdupq_n_s32(left_shift);
+ for (; i <= size - 16; i += 16)
+ {
+ // We load / store 16 at a time, multiplying as four sets of 4 int32s.
+ const auto input2_val_original = vld1q_s8(input2_data + i);
+ const auto input2_val_s16_high = vmovl_s8(vget_high_s8(input2_val_original));
+ const auto input2_val_s16_low = vmovl_s8(vget_low_s8(input2_val_original));
+
+ const auto input2_val_high = vaddq_s16(input2_val_s16_high, input2_offset_vector);
+ const auto input2_val_low = vaddq_s16(input2_val_s16_low, input2_offset_vector);
+
+ const auto input2_val_low_low = vget_low_s16(input2_val_low);
+ const auto input2_val_low_high = vget_high_s16(input2_val_low);
+ const auto input2_val_high_low = vget_low_s16(input2_val_high);
+ const auto input2_val_high_high = vget_high_s16(input2_val_high);
+
+ auto p1 = vmull_n_s16(input2_val_high_high, input1_val);
+ auto p2 = vmull_n_s16(input2_val_high_low, input1_val);
+ auto p3 = vmull_n_s16(input2_val_low_high, input1_val);
+ auto p4 = vmull_n_s16(input2_val_low_low, input1_val);
+
+ p1 = vshlq_s32(p1, left_shift_vec);
+ p2 = vshlq_s32(p2, left_shift_vec);
+ p3 = vshlq_s32(p3, left_shift_vec);
+ p4 = vshlq_s32(p4, left_shift_vec);
+
+ p1 = vqrdmulhq_n_s32(p1, params.output_multiplier);
+ p2 = vqrdmulhq_n_s32(p2, params.output_multiplier);
+ p3 = vqrdmulhq_n_s32(p3, params.output_multiplier);
+ p4 = vqrdmulhq_n_s32(p4, params.output_multiplier);
+ using gemmlowp::RoundingDivideByPOT;
+ p1 = RoundingDivideByPOT(p1, right_shift);
+ p2 = RoundingDivideByPOT(p2, right_shift);
+ p3 = RoundingDivideByPOT(p3, right_shift);
+ p4 = RoundingDivideByPOT(p4, right_shift);
+
+ const auto p1_narrowed = vqmovn_s32(p1);
+ const auto p2_narrowed = vqmovn_s32(p2);
+ const auto p3_narrowed = vqmovn_s32(p3);
+ const auto p4_narrowed = vqmovn_s32(p4);
+
+ const int16x8_t p_part1 =
+ vaddq_s16(vcombine_s16(p2_narrowed, p1_narrowed), output_offset_vector);
+ const int16x8_t p_part2 =
+ vaddq_s16(vcombine_s16(p4_narrowed, p3_narrowed), output_offset_vector);
+ const int8x16_t p = vcombine_s8(vqmovn_s16(p_part2), vqmovn_s16(p_part1));
+
+ const auto clamped =
+ vmaxq_s8(output_activation_min_vector, vminq_s8(output_activation_max_vector, p));
+ vst1q_s8(output_data + i, clamped);
+ }
+#endif // NEON
+
+ for (; i < size; ++i)
+ {
+ const int32_t input2_val = params.input2_offset + input2_data[i];
+ const int32_t unclamped_result =
+ params.output_offset + MultiplyByQuantizedMultiplier(input1_val * input2_val,
+ params.output_multiplier,
+ params.output_shift);
+ const int32_t clamped_output = std::min(
+ params.quantized_activation_max, std::max(params.quantized_activation_min, unclamped_result));
+ output_data[i] = static_cast<int8_t>(clamped_output);
+ }
+}
+
+template <typename T>
+inline typename std::enable_if_t<is_quant8<T>::value>
+BroadcastMulDispatch(const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape,
+ const T *input1_data, const Shape &input2_shape, const T *input2_data,
+ const Shape &output_shape, T *output_data)
{
if (params.broadcast_category == BroadcastableOpCategory::kGenericBroadcast)
{
- const std::function<uint8_t(const BinaryArithmeticOpParam &, const uint8_t &, const uint8_t &)>
- fn =
- [](const BinaryArithmeticOpParam ¶ms, const uint8_t &a, const uint8_t &b) -> uint8_t {
- return static_cast<uint8_t>(quant8_mul(params, a, b));
- };
- reference::BroadcastBinaryArithmeticOpSlowQuant8(
- params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data, fn);
+ const std::function<T(const BinaryArithmeticOpParam &, const T &, const T &)> fn =
+ [](const BinaryArithmeticOpParam ¶ms, const T &a, const T &b) {
+ return static_cast<T>(quant8_mul(params, a, b));
+ };
+ reference::BroadcastBinaryArithmeticOpSlow(params, input1_shape, input1_data, input2_shape,
+ input2_data, output_shape, output_data, fn);
return;
}
BinaryBroadcastFiveFold(
- params, params.broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast,
- input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data,
- static_cast<void (*)(int, const BinaryArithmeticOpParam &, const uint8_t *, const uint8_t *,
- uint8_t *)>(MulElementwiseQuant8),
- static_cast<void (*)(int, const BinaryArithmeticOpParam &, uint8_t, const uint8_t *,
- uint8_t *)>(MulSimpleBroadcastQuant8));
+ params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data,
+ static_cast<void (*)(int, const BinaryArithmeticOpParam &, const T *, const T *, T *)>(
+ MulElementwise),
+ static_cast<void (*)(int, const BinaryArithmeticOpParam &, T, const T *, T *)>(
+ MulSimpleBroadcast));
}
inline void BroadcastMulDispatch(const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape,
return;
}
auto implFuncs = getBinaryOpWithActivationImplFloat<BinaryOpFuncMulFloat>(params);
- BinaryBroadcastFiveFold(
- params, params.broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast,
- input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data,
- implFuncs.first, implFuncs.second);
+ BinaryBroadcastFiveFold(params, input1_shape, input1_data, input2_shape, input2_data,
+ output_shape, output_data, implFuncs.first, implFuncs.second);
}
inline void Div(const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape,
}
}
-} // nnfw
-} // cker
-} // optimized
+} // namespace optimized
+} // namespace cker
+} // namespace nnfw
#endif
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_OPTIMIZED_DEPTHWISE_CONV_INT8_H__
+#define __NNFW_CKER_OPTIMIZED_DEPTHWISE_CONV_INT8_H__
+
+#include "cker/CpuBackendThreadpool.h"
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+#include "cker/neon/neon_check.h"
+#include "cker/operation/Quantize.h"
+
+#include <fixedpoint/fixedpoint.h>
+#include <public/gemmlowp.h>
+
+namespace nnfw
+{
+namespace cker
+{
+namespace optimized_integer_ops
+{
+
+// Category of depthwise convolution output rounding.
+enum class DepthwiseConvOutputRounding
+{
+ kNone = 0, // Invalid: specific method must be specified.
+ kAwayFromZero, // Original method: exact halves rounded away from zero.
+ kUpward, // Halves towards +infinity: adds 0.5 before truncate.
+ // This is where a future kNearestEven would be placed.
+};
+
+// Category of depthwise convolution depth multiplication.
+enum class DepthwiseConvDepthMultiplication
+{
+ kNoMultiplication = 0, // Depth multiplier = 1.
+ kUnitInputDepth, // Input depth = 1, output depth = depth multiplier.
+};
+
+namespace depthwise_conv
+{
+
+// Implementation of quantized DepthwiseConv
+
+template <bool kAllowStrided, int kFixedInputDepth, int kFixedDepthMultiplier>
+struct QuantizedDepthwiseConvKernel
+{
+};
+
+#ifdef USE_NEON
+template <> struct QuantizedDepthwiseConvKernel<true, 8, 2>
+{
+ static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+ const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+ {
+ // Load the filters.
+ int8x8x2_t filter_s8;
+ filter_s8.val[0] = vld1_s8(filter_ptr);
+ filter_s8.val[1] = vld1_s8(filter_ptr + 8);
+ int16x8_t filter[2];
+ for (int i = 0; i < 2; i++)
+ {
+ filter[i] = vmovl_s8(filter_s8.val[i]);
+ }
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ // Load the accumulators from acc_buffer
+ int32x4x2_t acc[2];
+ for (int i = 0; i < 2; i++)
+ {
+ acc[i].val[0] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ acc[i].val[1] = vld1q_s32(acc_buffer_ptr + 4 * i + 8);
+ }
+ // Load the inputs, add input_offset.
+ const int8x8_t input_s8 = vld1_s8(input_ptr);
+ input_ptr += input_ptr_increment;
+ const int16x8_t input_s16 = vmovl_s8(input_s8);
+ const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+ // Duplicate the input values, 2-fold
+ const int16x8x2_t input_dup2 = vzipq_s16(input, input);
+ // Multiply-accumulate
+ for (int i = 0; i < 2; i++)
+ {
+ acc[0].val[i] =
+ vmlal_s16(acc[0].val[i], vget_low_s16(filter[i]), vget_low_s16(input_dup2.val[i]));
+ acc[1].val[i] =
+ vmlal_s16(acc[1].val[i], vget_high_s16(filter[i]), vget_high_s16(input_dup2.val[i]));
+ }
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 2; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i].val[0]);
+ vst1q_s32(acc_buffer_ptr + 4 * i + 8, acc[i].val[1]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 8, 1>
+{
+ static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+ const int8_t *input_ptr, int16_t input_offset, int /* input_ptr_increment */,
+ const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+ {
+ // Load the filters.
+ const int8x8_t filter_s8 = vld1_s8(filter_ptr);
+ const int16x8_t filter = vmovl_s8(filter_s8);
+
+ int outp = 0;
+ // Handle 2 output pixels at a time.
+ for (; outp <= num_output_pixels - 2; outp += 2)
+ {
+ // Load the accumulators from acc_buffer.
+ int32x4_t acc[4];
+ for (int i = 0; i < 4; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+ // Load the inputs, add input_offset.
+ int8x8_t input_s8[2];
+ for (int i = 0; i < 2; i++)
+ {
+ input_s8[i] = vld1_s8(input_ptr + 8 * i);
+ }
+ input_ptr += 16;
+ int16x8_t input[2];
+ for (int i = 0; i < 2; i++)
+ {
+ input[i] = vmovl_s8(input_s8[i]);
+ }
+ for (int i = 0; i < 2; i++)
+ {
+ input[i] = vaddq_s16(input[i], vdupq_n_s16(input_offset));
+ }
+ // Multiply-accumulate.
+ acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), vget_low_s16(input[0]));
+ acc[1] = vmlal_s16(acc[1], vget_high_s16(filter), vget_high_s16(input[0]));
+ acc[2] = vmlal_s16(acc[2], vget_low_s16(filter), vget_low_s16(input[1]));
+ acc[3] = vmlal_s16(acc[3], vget_high_s16(filter), vget_high_s16(input[1]));
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 4; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ // Handle 1 output pixel at a time.
+ for (; outp < num_output_pixels; outp++)
+ {
+ // Load the accumulators from acc_buffer.
+ int32x4_t acc[2];
+ acc[0] = vld1q_s32(acc_buffer_ptr);
+ acc[1] = vld1q_s32(acc_buffer_ptr + 4);
+
+ // Load the inputs, add input_offset.
+ const int8x8_t input_s8 = vld1_s8(input_ptr);
+ input_ptr += 8;
+ const int16x8_t input_s16 = vmovl_s8(input_s8);
+ const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+ // Multiply-accumulate.
+ acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), vget_low_s16(input));
+ acc[1] = vmlal_s16(acc[1], vget_high_s16(filter), vget_high_s16(input));
+ // Store the accumulators back to acc_buffer
+ vst1q_s32(acc_buffer_ptr, acc[0]);
+ vst1q_s32(acc_buffer_ptr + 4, acc[1]);
+ acc_buffer_ptr += 8;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 4, 2>
+{
+ static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+ const int8_t *input_ptr, int16_t input_offset, int /* input_ptr_increment */,
+ const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+ {
+ // Load the filters.
+ const int8x8_t filter_s8 = vld1_s8(filter_ptr);
+ const int16x8_t filter = vmovl_s8(filter_s8);
+
+ int outp = 0;
+ // Handle 2 output pixels at a time.
+ for (; outp <= num_output_pixels - 2; outp += 2)
+ {
+ // Load the accumulators from acc_buffer
+ int32x4_t acc[4];
+ for (int i = 0; i < 4; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+ // Load the inputs, add input_offset.
+ const int8x8_t input_s8 = vld1_s8(input_ptr);
+ input_ptr += 8;
+ const int16x8_t input_s16 = vmovl_s8(input_s8);
+ const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+ // Duplicate the input values, 2-fold
+ const int16x8x2_t input_dup2 = vzipq_s16(input, input);
+ // Multiply-accumulate
+ for (int i = 0; i < 2; i++)
+ {
+ acc[2 * i + 0] =
+ vmlal_s16(acc[2 * i + 0], vget_low_s16(filter), vget_low_s16(input_dup2.val[i]));
+ acc[2 * i + 1] =
+ vmlal_s16(acc[2 * i + 1], vget_high_s16(filter), vget_high_s16(input_dup2.val[i]));
+ }
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 4; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ // Handle one output pixel at a time.
+ for (; outp < num_output_pixels; outp++)
+ {
+ // Load the accumulators from acc_buffer
+ int32x4_t acc[2];
+ for (int i = 0; i < 2; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+ // Load the inputs, add input_offset.
+ int8x8_t input_s8 = vdup_n_s8(0);
+ input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+ input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+ input_s8 = vset_lane_s8(input_ptr[2], input_s8, 2);
+ input_s8 = vset_lane_s8(input_ptr[3], input_s8, 3);
+ input_ptr += 4;
+ const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+ const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+ // Duplicate the input values, 2-fold
+ const int16x4x2_t input_dup2 = vzip_s16(input, input);
+ // Multiply-accumulate
+ acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), input_dup2.val[0]);
+ acc[1] = vmlal_s16(acc[1], vget_high_s16(filter), input_dup2.val[1]);
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 2; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 8;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 2, 8>
+{
+ static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+ const int8_t *input_ptr, int16_t input_offset, int /* input_ptr_increment */,
+ const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+ {
+ // Load the filters.
+ int16x8_t filter[2];
+ for (int i = 0; i < 2; i++)
+ {
+ const int8x8_t filter_s8 = vld1_s8(filter_ptr + 8 * i);
+ filter[i] = vmovl_s8(filter_s8);
+ }
+ int outp = 0;
+ // Handle two output pixels at a time.
+ for (; outp <= num_output_pixels - 2; outp += 2)
+ {
+ // Load the accumulators from acc_buffer.
+ int32x4_t acc[8];
+ for (int i = 0; i < 8; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+ // Load the inputs, add input_offset.
+ int8x8_t input_s8 = vdup_n_s8(0);
+ input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+ input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+ input_s8 = vset_lane_s8(input_ptr[2], input_s8, 2);
+ input_s8 = vset_lane_s8(input_ptr[3], input_s8, 3);
+ input_ptr += 4;
+ const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+ const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+ // Multiply-accumulate.
+ acc[0] = vmlal_lane_s16(acc[0], vget_low_s16(filter[0]), input, 0);
+ acc[1] = vmlal_lane_s16(acc[1], vget_high_s16(filter[0]), input, 0);
+ acc[2] = vmlal_lane_s16(acc[2], vget_low_s16(filter[1]), input, 1);
+ acc[3] = vmlal_lane_s16(acc[3], vget_high_s16(filter[1]), input, 1);
+ acc[4] = vmlal_lane_s16(acc[4], vget_low_s16(filter[0]), input, 2);
+ acc[5] = vmlal_lane_s16(acc[5], vget_high_s16(filter[0]), input, 2);
+ acc[6] = vmlal_lane_s16(acc[6], vget_low_s16(filter[1]), input, 3);
+ acc[7] = vmlal_lane_s16(acc[7], vget_high_s16(filter[1]), input, 3);
+ // Store the accumulators back to acc_buffer.
+ for (int i = 0; i < 8; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 32;
+ }
+ // Handle one output pixel at a time.
+ for (; outp < num_output_pixels; outp++)
+ {
+ // Load the accumulators from acc_buffer.
+ int32x4_t acc[4];
+ for (int i = 0; i < 4; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+ // Load the inputs, add input_offset.
+ int8x8_t input_s8 = vdup_n_s8(0);
+ input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+ input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+ input_ptr += 2;
+ const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+ const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+
+ // Multiply-accumulate.
+ acc[0] = vmlal_lane_s16(acc[0], vget_low_s16(filter[0]), input, 0);
+ acc[1] = vmlal_lane_s16(acc[1], vget_high_s16(filter[0]), input, 0);
+ acc[2] = vmlal_lane_s16(acc[2], vget_low_s16(filter[1]), input, 1);
+ acc[3] = vmlal_lane_s16(acc[3], vget_high_s16(filter[1]), input, 1);
+
+ // Store the accumulators back to acc_buffer.
+ for (int i = 0; i < 4; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 2, 2>
+{
+ static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+ const int8_t *input_ptr, int16_t input_offset, int /* input_ptr_increment */,
+ const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+ {
+ // Load the filters.
+ int8x8_t filter_s8 = vdup_n_s8(0);
+ filter_s8 = vset_lane_s8(filter_ptr[0], filter_s8, 0);
+ filter_s8 = vset_lane_s8(filter_ptr[1], filter_s8, 1);
+ filter_s8 = vset_lane_s8(filter_ptr[2], filter_s8, 2);
+ filter_s8 = vset_lane_s8(filter_ptr[3], filter_s8, 3);
+ const int16x4_t filter = vget_low_s16(vmovl_s8(filter_s8));
+
+ int outp = 0;
+ // Handle 4 output pixels at a time.
+ for (; outp <= num_output_pixels - 4; outp += 4)
+ {
+ // Load the accumulators from acc_buffer
+ int32x4_t acc[4];
+ for (int i = 0; i < 4; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+
+ // Load the inputs, add input_offset.
+ const int8x8_t input_s8 = vld1_s8(input_ptr);
+ input_ptr += 8;
+ const int16x8_t input_s16 = vmovl_s8(input_s8);
+ const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+ // Duplicate the input values, 2-fold
+ const int16x8x2_t input_dup2 = vzipq_s16(input, input);
+ // Multiply-accumulate
+ acc[0] = vmlal_s16(acc[0], filter, vget_low_s16(input_dup2.val[0]));
+ acc[1] = vmlal_s16(acc[1], filter, vget_high_s16(input_dup2.val[0]));
+ acc[2] = vmlal_s16(acc[2], filter, vget_low_s16(input_dup2.val[1]));
+ acc[3] = vmlal_s16(acc[3], filter, vget_high_s16(input_dup2.val[1]));
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 4; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ // Handle one output pixel at a time.
+ for (; outp < num_output_pixels; outp++)
+ {
+ // Load the accumulators from acc_buffer
+ int32x4_t acc = vld1q_s32(acc_buffer_ptr);
+
+ int8x8_t input_s8 = vdup_n_s8(0);
+ input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+ input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+ input_ptr += 2;
+ const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+ const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+ // Duplicate the input values, 2-fold
+ const int16x4_t input_dup2 = vzip_s16(input, input).val[0];
+ // Multiply-accumulate
+ acc = vmlal_s16(acc, filter, input_dup2);
+ // Store the accumulators back to acc_buffer
+ vst1q_s32(acc_buffer_ptr, acc);
+ acc_buffer_ptr += 4;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 2, 1>
+{
+ static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+ const int8_t *input_ptr, int16_t input_offset, int /* input_ptr_increment */,
+ const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+ {
+ // Load the filters.
+ int8x8_t filter_s8 = vdup_n_s8(0);
+ filter_s8 = vset_lane_s8(filter_ptr[0], filter_s8, 0);
+ filter_s8 = vset_lane_s8(filter_ptr[1], filter_s8, 1);
+ filter_s8 = vset_lane_s8(filter_ptr[0], filter_s8, 2);
+ filter_s8 = vset_lane_s8(filter_ptr[1], filter_s8, 3);
+ const int16x4_t filter = vget_low_s16(vmovl_s8(filter_s8));
+
+ int outp = 0;
+ // Handle 8 output pixels at a time.
+ for (; outp <= num_output_pixels - 8; outp += 8)
+ {
+ // Load the accumulators from acc_buffer.
+ int32x4_t acc[4];
+ for (int i = 0; i < 4; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+ // Load the inputs, add input_offset.
+ int8x8_t input_s8[2];
+ for (int i = 0; i < 2; i++)
+ {
+ input_s8[i] = vld1_s8(input_ptr + 8 * i);
+ }
+ input_ptr += 16;
+ int16x8_t input[2];
+ for (int i = 0; i < 2; i++)
+ {
+ input[i] = vmovl_s8(input_s8[i]);
+ }
+ for (int i = 0; i < 2; i++)
+ {
+ input[i] = vaddq_s16(input[i], vdupq_n_s16(input_offset));
+ }
+
+ // Multiply-accumulate.
+ acc[0] = vmlal_s16(acc[0], filter, vget_low_s16(input[0]));
+ acc[1] = vmlal_s16(acc[1], filter, vget_high_s16(input[0]));
+ acc[2] = vmlal_s16(acc[2], filter, vget_low_s16(input[1]));
+ acc[3] = vmlal_s16(acc[3], filter, vget_high_s16(input[1]));
+ // Store the accumulators back to acc_buffer.
+ for (int i = 0; i < 4; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ // Handle 4 output pixels at a time.
+ for (; outp <= num_output_pixels - 4; outp += 4)
+ {
+ // Load the accumulators from acc_buffer.
+ int32x4_t acc[2];
+ for (int i = 0; i < 2; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+ // Load the inputs, add input_offset.
+ const int8x8_t input_s8 = vld1_s8(input_ptr);
+ input_ptr += 8;
+ const int16x8_t input_s16 = vmovl_s8(input_s8);
+ const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+
+ // Multiply-accumulate.
+ acc[0] = vmlal_s16(acc[0], filter, vget_low_s16(input));
+ acc[1] = vmlal_s16(acc[1], filter, vget_high_s16(input));
+ // Store the accumulators back to acc_buffer.
+ for (int i = 0; i < 2; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 8;
+ }
+ // Handle 2 output pixels at a time.
+ for (; outp <= num_output_pixels - 2; outp += 2)
+ {
+ // Load the accumulators from acc_buffer.
+ int32x4_t acc = vld1q_s32(acc_buffer_ptr);
+ // Load the inputs, add input_offset.
+ int8x8_t input_s8 = vdup_n_s8(0);
+ input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+ input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+ input_s8 = vset_lane_s8(input_ptr[2], input_s8, 2);
+ input_s8 = vset_lane_s8(input_ptr[3], input_s8, 3);
+ input_ptr += 4;
+ const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+ const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+
+ // Multiply-accumulate.
+ acc = vmlal_s16(acc, filter, input);
+ // Store the accumulators back to acc_buffer.
+ vst1q_s32(acc_buffer_ptr, acc);
+ acc_buffer_ptr += 4;
+ }
+ // Handle 1 output pixel at a time.
+ for (; outp < num_output_pixels; outp++)
+ {
+ // Load the accumulators from acc_buffer.
+ int32x2_t acc = vld1_s32(acc_buffer_ptr);
+ // Load the inputs, add input_offset.
+ int8x8_t input_s8 = vdup_n_s8(0);
+ input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+ input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+ input_ptr += 2;
+ const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+ const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+
+ // Multiply-accumulate.
+ acc = vget_low_s32(vmlal_s16(vcombine_s32(acc, acc), filter, input));
+ // Store the accumulators back to acc_buffer.
+ vst1_s32(acc_buffer_ptr, acc);
+ acc_buffer_ptr += 2;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 1, 2>
+{
+ static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+ const int8_t *input_ptr, int16_t input_offset, int /* input_ptr_increment */,
+ const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+ {
+ // Load the filters.
+ int8x8_t filter_s8 = vdup_n_s8(0);
+ filter_s8 = vset_lane_s8(filter_ptr[0], filter_s8, 0);
+ filter_s8 = vset_lane_s8(filter_ptr[1], filter_s8, 1);
+ filter_s8 = vset_lane_s8(filter_ptr[0], filter_s8, 2);
+ filter_s8 = vset_lane_s8(filter_ptr[1], filter_s8, 3);
+ const int16x4_t filter = vget_low_s16(vmovl_s8(filter_s8));
+
+ int outp = 0;
+ // Handle 8 output pixels at a time.
+ for (; outp <= num_output_pixels - 8; outp += 8)
+ {
+ // Load the accumulators from acc_buffer
+ int32x4_t acc[4];
+ for (int i = 0; i < 4; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+
+ // Load the inputs, add input_offset.
+ const int8x8_t input_s8 = vld1_s8(input_ptr);
+ input_ptr += 8;
+ const int16x8_t input_s16 = vmovl_s8(input_s8);
+ const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+ // Duplicate the input values, 2-fold
+ const int16x8x2_t input_dup2 = vzipq_s16(input, input);
+ // Multiply-accumulate
+ acc[0] = vmlal_s16(acc[0], filter, vget_low_s16(input_dup2.val[0]));
+ acc[1] = vmlal_s16(acc[1], filter, vget_high_s16(input_dup2.val[0]));
+ acc[2] = vmlal_s16(acc[2], filter, vget_low_s16(input_dup2.val[1]));
+ acc[3] = vmlal_s16(acc[3], filter, vget_high_s16(input_dup2.val[1]));
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 4; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ // Handle one output pixel at a time.
+ for (; outp < num_output_pixels; outp++)
+ {
+ // Load the accumulators from acc_buffer
+ int32x2_t acc = vld1_s32(acc_buffer_ptr);
+
+ // Load the inputs, add input_offset.
+ const uint32_t input = *input_ptr++ + input_offset;
+
+ // Multiply-accumulate
+ acc = vget_low_s32(vmlal_n_s16(vcombine_s32(acc, acc), filter, input));
+ // Store the accumulators back to acc_buffer
+ vst1_s32(acc_buffer_ptr, acc);
+ acc_buffer_ptr += 2;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 1, 4>
+{
+ static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+ const int8_t *input_ptr, int16_t input_offset, int /* input_ptr_increment */,
+ const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+ {
+ // Load the filters.
+ int8x8_t filter_s8 = vdup_n_s8(0);
+ filter_s8 = vset_lane_s8(filter_ptr[0], filter_s8, 0);
+ filter_s8 = vset_lane_s8(filter_ptr[1], filter_s8, 1);
+ filter_s8 = vset_lane_s8(filter_ptr[2], filter_s8, 2);
+ filter_s8 = vset_lane_s8(filter_ptr[3], filter_s8, 3);
+ const int16x4_t filter = vget_low_s16(vmovl_s8(filter_s8));
+
+ int outp = 0;
+ // Handle 8 output pixels at a time.
+ for (; outp <= num_output_pixels - 8; outp += 8)
+ {
+ // Load the accumulators from acc_buffer
+ int32x4_t acc[8];
+ for (int i = 0; i < 8; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+
+ // Load the inputs, add input_offset.
+ int8x8_t input_s8 = vld1_s8(input_ptr);
+ input_ptr += 8;
+ const int16x8_t input_s16 = vmovl_s8(input_s8);
+ const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+
+ // Multiply-accumulate
+ acc[0] = vmlal_lane_s16(acc[0], filter, vget_low_s16(input), 0);
+ acc[1] = vmlal_lane_s16(acc[1], filter, vget_low_s16(input), 1);
+ acc[2] = vmlal_lane_s16(acc[2], filter, vget_low_s16(input), 2);
+ acc[3] = vmlal_lane_s16(acc[3], filter, vget_low_s16(input), 3);
+ acc[4] = vmlal_lane_s16(acc[4], filter, vget_high_s16(input), 0);
+ acc[5] = vmlal_lane_s16(acc[5], filter, vget_high_s16(input), 1);
+ acc[6] = vmlal_lane_s16(acc[6], filter, vget_high_s16(input), 2);
+ acc[7] = vmlal_lane_s16(acc[7], filter, vget_high_s16(input), 3);
+
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 8; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 32;
+ }
+ // Handle 4 output pixels at a time.
+ for (; outp <= num_output_pixels - 4; outp += 4)
+ {
+ // Load the accumulators from acc_buffer
+ int32x4_t acc[4];
+ for (int i = 0; i < 4; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+
+ // Load the inputs, add input_offset.
+ int8x8_t input_s8 = vdup_n_s8(0);
+ input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+ input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+ input_s8 = vset_lane_s8(input_ptr[2], input_s8, 2);
+ input_s8 = vset_lane_s8(input_ptr[3], input_s8, 3);
+ input_ptr += 4;
+ const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+ const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+
+ // Multiply-accumulate
+ acc[0] = vmlal_lane_s16(acc[0], filter, input, 0);
+ acc[1] = vmlal_lane_s16(acc[1], filter, input, 1);
+ acc[2] = vmlal_lane_s16(acc[2], filter, input, 2);
+ acc[3] = vmlal_lane_s16(acc[3], filter, input, 3);
+
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 4; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ // Handle one output pixel at a time.
+ for (; outp < num_output_pixels; outp++)
+ {
+ // Load the accumulators from acc_buffer
+ int32x4_t acc = vld1q_s32(acc_buffer_ptr);
+
+ // Load the inputs, add input_offset.
+ const uint32_t input = *input_ptr++ + input_offset;
+
+ // Multiply-accumulate
+ acc = vmlal_n_s16(acc, filter, input);
+ // Store the accumulators back to acc_buffer
+ vst1q_s32(acc_buffer_ptr, acc);
+ acc_buffer_ptr += 4;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 4, 1>
+{
+ static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+ const int8_t *input_ptr, int16_t input_offset, int /* input_ptr_increment */,
+ const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+ {
+ // Load the filters.
+ int8x8_t filter_s8 = vdup_n_s8(0);
+ filter_s8 = vset_lane_s8(filter_ptr[0], filter_s8, 0);
+ filter_s8 = vset_lane_s8(filter_ptr[1], filter_s8, 1);
+ filter_s8 = vset_lane_s8(filter_ptr[2], filter_s8, 2);
+ filter_s8 = vset_lane_s8(filter_ptr[3], filter_s8, 3);
+ const int16x4_t filter = vget_low_s16(vmovl_s8(filter_s8));
+
+ int outp = 0;
+ // Handle 4 output pixels at a time.
+ for (; outp <= num_output_pixels - 4; outp += 4)
+ {
+ // Load the accumulators from acc_buffer
+ int32x4_t acc[4];
+ for (int i = 0; i < 4; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+ // Load the inputs, add input_offset.
+ int16x8_t input[2];
+ for (int i = 0; i < 2; i++)
+ {
+ const int8x8_t input_s8 = vld1_s8(input_ptr + 8 * i);
+ const int16x8_t input_s16 = vmovl_s8(input_s8);
+ input[i] = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+ }
+ input_ptr += 16;
+ // Multiply-accumulate
+ for (int i = 0; i < 2; i++)
+ {
+ acc[2 * i + 0] = vmlal_s16(acc[2 * i + 0], filter, vget_low_s16(input[i]));
+ acc[2 * i + 1] = vmlal_s16(acc[2 * i + 1], filter, vget_high_s16(input[i]));
+ }
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 4; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ // Handle one output pixel at a time.
+ for (; outp < num_output_pixels; outp++)
+ {
+ // Load the accumulators from acc_buffer
+ int32x4_t acc;
+ acc = vld1q_s32(acc_buffer_ptr);
+
+ // Load the inputs, add input_offset.
+ int8x8_t input_s8 = vdup_n_s8(0);
+ input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+ input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+ input_s8 = vset_lane_s8(input_ptr[2], input_s8, 2);
+ input_s8 = vset_lane_s8(input_ptr[3], input_s8, 3);
+ input_ptr += 4;
+ const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+ const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+ // Multiply-accumulate
+ acc = vmlal_s16(acc, filter, input);
+ // Store the accumulators back to acc_buffer
+ vst1q_s32(acc_buffer_ptr, acc);
+ acc_buffer_ptr += 4;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 4, 4>
+{
+ static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+ const int8_t *input_ptr, int16_t input_offset, int /* input_ptr_increment */,
+ const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+ {
+ // Load the filters.
+ int16x8_t filter[2];
+ for (int i = 0; i < 2; i++)
+ {
+ const int8x8_t filter_s8 = vld1_s8(filter_ptr + 8 * i);
+ filter[i] = vmovl_s8(filter_s8);
+ }
+
+ int outp = 0;
+ // Handle 2 output pixels at a time.
+ for (; outp <= num_output_pixels - 2; outp += 2)
+ {
+ // Load the accumulators from acc_buffer
+ int32x4_t acc[8];
+ for (int i = 0; i < 8; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+
+ // Load the inputs, add input_offset.
+ int8x8_t input_s8 = vld1_s8(input_ptr);
+ input_ptr += 8;
+ const int16x8_t input_s16 = vmovl_s8(input_s8);
+ const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+
+ // Multiply-accumulate
+ acc[0] = vmlal_lane_s16(acc[0], vget_low_s16(filter[0]), vget_low_s16(input), 0);
+ acc[1] = vmlal_lane_s16(acc[1], vget_high_s16(filter[0]), vget_low_s16(input), 1);
+ acc[2] = vmlal_lane_s16(acc[2], vget_low_s16(filter[1]), vget_low_s16(input), 2);
+ acc[3] = vmlal_lane_s16(acc[3], vget_high_s16(filter[1]), vget_low_s16(input), 3);
+ acc[4] = vmlal_lane_s16(acc[4], vget_low_s16(filter[0]), vget_high_s16(input), 0);
+ acc[5] = vmlal_lane_s16(acc[5], vget_high_s16(filter[0]), vget_high_s16(input), 1);
+ acc[6] = vmlal_lane_s16(acc[6], vget_low_s16(filter[1]), vget_high_s16(input), 2);
+ acc[7] = vmlal_lane_s16(acc[7], vget_high_s16(filter[1]), vget_high_s16(input), 3);
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 8; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 32;
+ }
+ // Handle one output pixel at a time.
+ for (; outp < num_output_pixels; outp++)
+ {
+ // Load the accumulators from acc_buffer
+ int32x4_t acc[4];
+ for (int i = 0; i < 4; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+
+ // Load the inputs, add input_offset.
+ int8x8_t input_s8 = vdup_n_s8(0);
+ input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+ input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+ input_s8 = vset_lane_s8(input_ptr[2], input_s8, 2);
+ input_s8 = vset_lane_s8(input_ptr[3], input_s8, 3);
+ input_ptr += 4;
+ const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+ const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+
+ // Multiply-accumulate
+ acc[0] = vmlal_lane_s16(acc[0], vget_low_s16(filter[0]), input, 0);
+ acc[1] = vmlal_lane_s16(acc[1], vget_high_s16(filter[0]), input, 1);
+ acc[2] = vmlal_lane_s16(acc[2], vget_low_s16(filter[1]), input, 2);
+ acc[3] = vmlal_lane_s16(acc[3], vget_high_s16(filter[1]), input, 3);
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 4; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 0, 3>
+{
+ static void Run(int num_output_pixels, int input_depth, int /* depth_multiplier */,
+ const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+ {
+ // We will have to duplicate bytes in a NEON register, 3-fold.
+ // We will do that by register-level table-look-up using VTBL instructions.
+ // Here we prepare the registers containing the table-lookup indices.
+ static const int8_t dup3_indices_array[3][8] = {
+ {0, 0, 0, 1, 1, 1, 2, 2}, {2, 3, 3, 3, 4, 4, 4, 5}, {5, 5, 6, 6, 6, 7, 7, 7}};
+ int8x8_t dup3_indices[3];
+ for (int i = 0; i < 3; i++)
+ {
+ dup3_indices[i] = vld1_s8(dup3_indices_array[i]);
+ }
+
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ const int8_t *local_filter_ptr = filter_ptr;
+ const int8_t *local_input_ptr = input_ptr;
+ int ic = 0;
+ // Handle 8 input channels at a time.
+ for (; ic <= input_depth - 8; ic += 8)
+ {
+ // Load the filters.
+ int16x8_t filter[3];
+ int8x8x3_t filter_s8;
+ filter_s8.val[0] = vld1_s8(local_filter_ptr);
+ filter_s8.val[1] = vld1_s8(local_filter_ptr + 8);
+ filter_s8.val[2] = vld1_s8(local_filter_ptr + 16);
+ local_filter_ptr += 24;
+ for (int i = 0; i < 3; i++)
+ {
+ filter[i] = vmovl_s8(filter_s8.val[i]);
+ }
+ // Load the inputs, duplicate 3-fold, add input_offset.
+ const int8x8_t input_s8 = vld1_s8(local_input_ptr);
+ local_input_ptr += 8;
+
+ int8x8_t input_s8_dup3[3];
+ for (int i = 0; i < 3; i++)
+ {
+ input_s8_dup3[i] = vtbl1_s8(input_s8, dup3_indices[i]);
+ }
+ int16x8_t input_dup3[3];
+ for (int i = 0; i < 3; i++)
+ {
+ const int16x8_t input_s16_dup3 = vmovl_s8(input_s8_dup3[i]);
+ input_dup3[i] = vaddq_s16(input_s16_dup3, vdupq_n_s16(input_offset));
+ }
+ // Load the accumulators from acc_buffer
+ int32x4x3_t acc[2];
+ for (int i = 0; i < 2; i++)
+ {
+ acc[i].val[0] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ acc[i].val[1] = vld1q_s32(acc_buffer_ptr + 4 * i + 8);
+ acc[i].val[2] = vld1q_s32(acc_buffer_ptr + 4 * i + 16);
+ }
+ // Multiply-accumulate
+ for (int j = 0; j < 3; j++)
+ {
+ acc[0].val[j] =
+ vmlal_s16(acc[0].val[j], vget_low_s16(input_dup3[j]), vget_low_s16(filter[j]));
+ acc[1].val[j] =
+ vmlal_s16(acc[1].val[j], vget_high_s16(input_dup3[j]), vget_high_s16(filter[j]));
+ }
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 2; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i].val[0]);
+ vst1q_s32(acc_buffer_ptr + 4 * i + 8, acc[i].val[1]);
+ vst1q_s32(acc_buffer_ptr + 4 * i + 16, acc[i].val[2]);
+ }
+ acc_buffer_ptr += 24;
+ }
+ // Handle one input channel at a time.
+ for (; ic < input_depth; ic++)
+ {
+ const int16_t input_val = *local_input_ptr++ + input_offset;
+ for (int i = 0; i < 3; i++)
+ {
+ *acc_buffer_ptr++ += static_cast<int32_t>(local_filter_ptr[i]) * input_val;
+ }
+ local_filter_ptr += 3;
+ }
+ input_ptr += input_ptr_increment;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 0, 2>
+{
+ static void Run(int num_output_pixels, int input_depth, int /* depth_multiplier */,
+ const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+ {
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ const int8_t *local_filter_ptr = filter_ptr;
+ const int8_t *local_input_ptr = input_ptr;
+ int ic = 0;
+ // Handle 8 input channels at a time.
+ for (; ic <= input_depth - 8; ic += 8)
+ {
+ // Load the filters.
+ int16x8_t filter[2];
+ int8x8x2_t filter_s8;
+ filter_s8.val[0] = vld1_s8(local_filter_ptr);
+ filter_s8.val[1] = vld1_s8(local_filter_ptr + 8);
+ local_filter_ptr += 16;
+ for (int i = 0; i < 2; i++)
+ {
+ filter[i] = vmovl_s8(filter_s8.val[i]);
+ }
+ // Load the inputs, add input_offset, duplicate 2-fold.
+ const int8x8_t input_s8 = vld1_s8(local_input_ptr);
+ local_input_ptr += 8;
+ const int16x8_t input_s16 = vmovl_s8(input_s8);
+ const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+ const int16x8x2_t input_dup2 = vzipq_s16(input, input);
+ // Load the accumulators from acc_buffer.
+ int32x4x2_t acc[2];
+ for (int i = 0; i < 2; i++)
+ {
+ acc[i].val[0] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ acc[i].val[1] = vld1q_s32(acc_buffer_ptr + 4 * i + 8);
+ }
+ // Multiply-accumulate.
+ for (int j = 0; j < 2; j++)
+ {
+ acc[0].val[j] =
+ vmlal_s16(acc[0].val[j], vget_low_s16(filter[j]), vget_low_s16(input_dup2.val[j]));
+ acc[1].val[j] =
+ vmlal_s16(acc[1].val[j], vget_high_s16(filter[j]), vget_high_s16(input_dup2.val[j]));
+ }
+ // Store the accumulators back to acc_buffer.
+ for (int i = 0; i < 2; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i].val[0]);
+ vst1q_s32(acc_buffer_ptr + 4 * i + 8, acc[i].val[1]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ // Handle one input channel at a time.
+ for (; ic < input_depth; ic++)
+ {
+ // Load the inputs.
+ const int16_t input_val = *local_input_ptr++ + input_offset;
+ for (int i = 0; i < 2; i++)
+ {
+ *acc_buffer_ptr++ += static_cast<int32_t>(local_filter_ptr[i]) * input_val;
+ }
+ local_filter_ptr += 2;
+ }
+ input_ptr += input_ptr_increment;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 0, 1>
+{
+ static void Run(int num_output_pixels, int input_depth, int /* depth_multiplier */,
+ const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+ {
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ const int8_t *local_filter_ptr = filter_ptr;
+ const int8_t *local_input_ptr = input_ptr;
+ int ic = 0;
+ // Handle 16 input channels at a time.
+ for (; ic <= input_depth - 16; ic += 16)
+ {
+ // Load the filters.
+ int8x8_t filter_s8_0 = vld1_s8(local_filter_ptr + 8 * 0);
+ int8x8_t filter_s8_1 = vld1_s8(local_filter_ptr + 8 * 1);
+ local_filter_ptr += 16;
+ int16x8_t filter_0 = vmovl_s8(filter_s8_0);
+ int16x8_t filter_1 = vmovl_s8(filter_s8_1);
+ // Load the inputs, add input_offset.
+ int8x8_t input_s8_0 = vld1_s8(local_input_ptr + 8 * 0);
+ int8x8_t input_s8_1 = vld1_s8(local_input_ptr + 8 * 1);
+ local_input_ptr += 16;
+ int16x8_t input_0 = vmovl_s8(input_s8_0);
+ int16x8_t input_1 = vmovl_s8(input_s8_1);
+ input_0 = vaddq_s16(input_0, vdupq_n_s16(input_offset));
+ input_1 = vaddq_s16(input_1, vdupq_n_s16(input_offset));
+ // Load the accumulators from acc_buffer
+ int32x4_t acc_0 = vld1q_s32(acc_buffer_ptr + 4 * 0);
+ int32x4_t acc_1 = vld1q_s32(acc_buffer_ptr + 4 * 1);
+ int32x4_t acc_2 = vld1q_s32(acc_buffer_ptr + 4 * 2);
+ int32x4_t acc_3 = vld1q_s32(acc_buffer_ptr + 4 * 3);
+ acc_0 = vmlal_s16(acc_0, vget_low_s16(input_0), vget_low_s16(filter_0));
+ acc_1 = vmlal_s16(acc_1, vget_high_s16(input_0), vget_high_s16(filter_0));
+ acc_2 = vmlal_s16(acc_2, vget_low_s16(input_1), vget_low_s16(filter_1));
+ acc_3 = vmlal_s16(acc_3, vget_high_s16(input_1), vget_high_s16(filter_1));
+ // Store the accumulators back to acc_buffer
+ vst1q_s32(acc_buffer_ptr + 4 * 0, acc_0);
+ vst1q_s32(acc_buffer_ptr + 4 * 1, acc_1);
+ vst1q_s32(acc_buffer_ptr + 4 * 2, acc_2);
+ vst1q_s32(acc_buffer_ptr + 4 * 3, acc_3);
+ acc_buffer_ptr += 16;
+ }
+ // Handle 8 input channels at a time.
+ for (; ic <= input_depth - 8; ic += 8)
+ {
+ // Load the filters.
+ const int8x8_t filter_s8 = vld1_s8(local_filter_ptr);
+ local_filter_ptr += 8;
+ const int16x8_t filter = vmovl_s8(filter_s8);
+ // Load the inputs, add input_offset.
+ const int8x8_t input_s8 = vld1_s8(local_input_ptr);
+ local_input_ptr += 8;
+ const int16x8_t input_s16 = vmovl_s8(input_s8);
+ const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+ // Load the accumulators from acc_buffer
+ int32x4_t acc[2];
+ for (int i = 0; i < 2; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate
+ acc[0] = vmlal_s16(acc[0], vget_low_s16(input), vget_low_s16(filter));
+ acc[1] = vmlal_s16(acc[1], vget_high_s16(input), vget_high_s16(filter));
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 2; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 8;
+ }
+ // Handle one input channel at a time.
+ for (; ic < input_depth; ic++)
+ {
+ const int16_t input_val = *local_input_ptr++ + input_offset;
+ const int16_t filter_val = *local_filter_ptr++;
+ *acc_buffer_ptr++ += static_cast<int32_t>(filter_val) * input_val;
+ }
+ input_ptr += input_ptr_increment;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 16, 1>
+{
+ static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+ const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+ {
+ // Load the filters.
+ int8x8_t filter_s8[2];
+ for (int i = 0; i < 2; i++)
+ {
+ filter_s8[i] = vld1_s8(filter_ptr + 8 * i);
+ }
+ int16x8_t filter[2];
+ for (int i = 0; i < 2; i++)
+ {
+ filter[i] = vmovl_s8(filter_s8[i]);
+ }
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ // Load the inputs, add input_offset.
+ int8x8_t input_s8[2];
+ for (int i = 0; i < 2; i++)
+ {
+ input_s8[i] = vld1_s8(input_ptr + 8 * i);
+ }
+ input_ptr += input_ptr_increment;
+ int16x8_t input[2];
+ for (int i = 0; i < 2; i++)
+ {
+ input[i] = vmovl_s8(input_s8[i]);
+ }
+ for (int i = 0; i < 2; i++)
+ {
+ input[i] = vaddq_s16(input[i], vdupq_n_s16(input_offset));
+ }
+ // Load the accumulators from acc_buffer
+ int32x4_t acc[4];
+ for (int i = 0; i < 4; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate
+ for (int i = 0; i < 2; i++)
+ {
+ acc[2 * i + 0] = vmlal_s16(acc[2 * i + 0], vget_low_s16(input[i]), vget_low_s16(filter[i]));
+ acc[2 * i + 1] =
+ vmlal_s16(acc[2 * i + 1], vget_high_s16(input[i]), vget_high_s16(filter[i]));
+ }
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 4; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 8, 1>
+{
+ static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+ const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+ {
+ // Load the filters.
+ const int8x8_t filter_s8 = vld1_s8(filter_ptr);
+ const int16x8_t filter = vmovl_s8(filter_s8);
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ // Load the inputs, add input_offset.
+ const int8x8_t input_s8 = vld1_s8(input_ptr);
+ const int16x8_t input_s16 = vmovl_s8(input_s8);
+ const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+ // Load the accumulators from acc_buffer
+ int32x4_t acc[2];
+ for (int i = 0; i < 2; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate
+ acc[0] = vmlal_s16(acc[0], vget_low_s16(input), vget_low_s16(filter));
+ acc[1] = vmlal_s16(acc[1], vget_high_s16(input), vget_high_s16(filter));
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 2; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 8;
+ input_ptr += input_ptr_increment;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 1, 16>
+{
+ static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+ const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+ {
+ // Load the filters.
+ int8x8_t filter_s8[2];
+ for (int i = 0; i < 2; i++)
+ {
+ filter_s8[i] = vld1_s8(filter_ptr + 8 * i);
+ }
+ int16x8_t filter[2];
+ for (int i = 0; i < 2; i++)
+ {
+ filter[i] = vmovl_s8(filter_s8[i]);
+ }
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ int8_t input_s8 = *input_ptr;
+ input_ptr += input_ptr_increment;
+ int16_t input = static_cast<int16_t>(input_s8 + input_offset);
+ // Load the accumulators from acc_buffer
+ int32x4_t acc[4];
+ for (int i = 0; i < 4; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate
+ for (int i = 0; i < 2; i++)
+ {
+ acc[2 * i + 0] = vmlal_n_s16(acc[2 * i + 0], vget_low_s16(filter[i]), input);
+ acc[2 * i + 1] = vmlal_n_s16(acc[2 * i + 1], vget_high_s16(filter[i]), input);
+ }
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 4; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 1, 32>
+{
+ static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+ const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+ {
+ // Load the filters.
+ int8x8_t filter_s8_0 = vld1_s8(filter_ptr + 8 * 0);
+ int8x8_t filter_s8_1 = vld1_s8(filter_ptr + 8 * 1);
+ int8x8_t filter_s8_2 = vld1_s8(filter_ptr + 8 * 2);
+ int8x8_t filter_s8_3 = vld1_s8(filter_ptr + 8 * 3);
+ int16x8_t filter_0 = vmovl_s8(filter_s8_0);
+ int16x8_t filter_1 = vmovl_s8(filter_s8_1);
+ int16x8_t filter_2 = vmovl_s8(filter_s8_2);
+ int16x8_t filter_3 = vmovl_s8(filter_s8_3);
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ int8_t input_s8 = *input_ptr;
+ input_ptr += input_ptr_increment;
+ int16_t input = static_cast<int16_t>(input_s8 + input_offset);
+ // Load the accumulators from acc_buffer
+ int32x4_t acc_0 = vld1q_s32(acc_buffer_ptr + 4 * 0);
+ int32x4_t acc_1 = vld1q_s32(acc_buffer_ptr + 4 * 1);
+ int32x4_t acc_2 = vld1q_s32(acc_buffer_ptr + 4 * 2);
+ int32x4_t acc_3 = vld1q_s32(acc_buffer_ptr + 4 * 3);
+ int32x4_t acc_4 = vld1q_s32(acc_buffer_ptr + 4 * 4);
+ int32x4_t acc_5 = vld1q_s32(acc_buffer_ptr + 4 * 5);
+ int32x4_t acc_6 = vld1q_s32(acc_buffer_ptr + 4 * 6);
+ int32x4_t acc_7 = vld1q_s32(acc_buffer_ptr + 4 * 7);
+ // Multiply-accumulate
+ acc_0 = vmlal_n_s16(acc_0, vget_low_s16(filter_0), input);
+ acc_1 = vmlal_n_s16(acc_1, vget_high_s16(filter_0), input);
+ acc_2 = vmlal_n_s16(acc_2, vget_low_s16(filter_1), input);
+ acc_3 = vmlal_n_s16(acc_3, vget_high_s16(filter_1), input);
+ acc_4 = vmlal_n_s16(acc_4, vget_low_s16(filter_2), input);
+ acc_5 = vmlal_n_s16(acc_5, vget_high_s16(filter_2), input);
+ acc_6 = vmlal_n_s16(acc_6, vget_low_s16(filter_3), input);
+ acc_7 = vmlal_n_s16(acc_7, vget_high_s16(filter_3), input);
+ // Store the accumulators back to acc_buffer
+ vst1q_s32(acc_buffer_ptr + 4 * 0, acc_0);
+ vst1q_s32(acc_buffer_ptr + 4 * 1, acc_1);
+ vst1q_s32(acc_buffer_ptr + 4 * 2, acc_2);
+ vst1q_s32(acc_buffer_ptr + 4 * 3, acc_3);
+ vst1q_s32(acc_buffer_ptr + 4 * 4, acc_4);
+ vst1q_s32(acc_buffer_ptr + 4 * 5, acc_5);
+ vst1q_s32(acc_buffer_ptr + 4 * 6, acc_6);
+ vst1q_s32(acc_buffer_ptr + 4 * 7, acc_7);
+ acc_buffer_ptr += 32;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 1, 20>
+{
+ static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+ const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+ {
+ // Load the filters.
+ // NEON wants to load 8 bytes at a time, but 20 is not divisible by 8.
+ // We load the first 16 bytes into filter_s8_{0,1} as usual.
+ // Then we load the 8 last bytes into filter_s8_x (x for 'extra').
+ // This is redundant: the first 4 bytes of filter_s8_x are the same
+ // as the last 4 bytes of filter_s8_x.
+ int8x8_t filter_s8_0 = vld1_s8(filter_ptr + 8 * 0);
+ int8x8_t filter_s8_1 = vld1_s8(filter_ptr + 8 * 1);
+ int8x8_t filter_s8_x = vld1_s8(filter_ptr + 8 * 1 + 4);
+ int16x8_t filter_0 = vmovl_s8(filter_s8_0);
+ int16x8_t filter_1 = vmovl_s8(filter_s8_1);
+ int16x8_t filter_x = vmovl_s8(filter_s8_x);
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ int8_t input_s8 = *input_ptr;
+ input_ptr += input_ptr_increment;
+ int16_t input = static_cast<int16_t>(input_s8 + input_offset);
+ // Load the accumulators from acc_buffer
+ int32x4_t acc_0 = vld1q_s32(acc_buffer_ptr + 4 * 0);
+ int32x4_t acc_1 = vld1q_s32(acc_buffer_ptr + 4 * 1);
+ int32x4_t acc_2 = vld1q_s32(acc_buffer_ptr + 4 * 2);
+ int32x4_t acc_3 = vld1q_s32(acc_buffer_ptr + 4 * 3);
+ int32x4_t acc_4 = vld1q_s32(acc_buffer_ptr + 4 * 4);
+ // Multiply-accumulate
+ acc_0 = vmlal_n_s16(acc_0, vget_low_s16(filter_0), input);
+ acc_1 = vmlal_n_s16(acc_1, vget_high_s16(filter_0), input);
+ acc_2 = vmlal_n_s16(acc_2, vget_low_s16(filter_1), input);
+ acc_3 = vmlal_n_s16(acc_3, vget_high_s16(filter_1), input);
+ acc_4 = vmlal_n_s16(acc_4, vget_high_s16(filter_x), input);
+ // Store the accumulators back to acc_buffer
+ vst1q_s32(acc_buffer_ptr + 4 * 0, acc_0);
+ vst1q_s32(acc_buffer_ptr + 4 * 1, acc_1);
+ vst1q_s32(acc_buffer_ptr + 4 * 2, acc_2);
+ vst1q_s32(acc_buffer_ptr + 4 * 3, acc_3);
+ vst1q_s32(acc_buffer_ptr + 4 * 4, acc_4);
+ acc_buffer_ptr += 20;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 1, 8>
+{
+ static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+ const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+ {
+ // Load the filters.
+ const int8x8_t filter_s8 = vld1_s8(filter_ptr);
+ const int16x8_t filter = vmovl_s8(filter_s8);
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ int8_t input_s8 = *input_ptr;
+ input_ptr += input_ptr_increment;
+ int16_t input = static_cast<int16_t>(input_s8 + input_offset);
+ // Load the accumulators from acc_buffer
+ int32x4_t acc[2];
+ for (int i = 0; i < 2; i++)
+ {
+ acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate
+ acc[0] = vmlal_n_s16(acc[0], vget_low_s16(filter), input);
+ acc[1] = vmlal_n_s16(acc[1], vget_high_s16(filter), input);
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 2; i++)
+ {
+ vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 8;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 2, 1>
+{
+ static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+ const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+ {
+ // Load the filters.
+ int8x8_t filter_s8 = vdup_n_s8(0);
+ filter_s8 = vset_lane_s8(filter_ptr[0], filter_s8, 0);
+ filter_s8 = vset_lane_s8(filter_ptr[1], filter_s8, 1);
+ filter_s8 = vset_lane_s8(filter_ptr[0], filter_s8, 2);
+ filter_s8 = vset_lane_s8(filter_ptr[1], filter_s8, 3);
+ const int16x4_t filter = vget_low_s16(vmovl_s8(filter_s8));
+
+ int outp = 0;
+
+ // Handle 2 output pixels at a time.
+ for (; outp <= num_output_pixels - 2; outp += 2)
+ {
+ // Load the accumulators from acc_buffer.
+ int32x4_t acc = vld1q_s32(acc_buffer_ptr);
+ // Load the inputs, add input_offset.
+ int16x4_t input_s16 = vdup_n_s16(0);
+ input_s16 = vset_lane_s16((reinterpret_cast<const int16_t *>(input_ptr))[0], input_s16, 0);
+ input_ptr += input_ptr_increment;
+ input_s16 = vset_lane_s16((reinterpret_cast<const int16_t *>(input_ptr))[0], input_s16, 1);
+ input_ptr += input_ptr_increment;
+ input_s16 = vget_low_s16(vmovl_s8(vreinterpret_s8_s16(input_s16)));
+ const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+
+ // Multiply-accumulate.
+ acc = vmlal_s16(acc, filter, input);
+ // Store the accumulators back to acc_buffer.
+ vst1q_s32(acc_buffer_ptr, acc);
+ acc_buffer_ptr += 4;
+ }
+
+ // Handle 1 output pixel at a time.
+ for (; outp < num_output_pixels; outp++)
+ {
+ // Load the accumulators from acc_buffer.
+ int32x2_t acc = vld1_s32(acc_buffer_ptr);
+ // Load the inputs, add input_offset.
+ int8x8_t input_s8 = vdup_n_s8(0);
+ input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+ input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+ input_ptr += input_ptr_increment;
+ const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+ const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+
+ // Multiply-accumulate.
+ acc = vget_low_s32(vmlal_s16(vcombine_s32(acc, acc), filter, input));
+ // Store the accumulators back to acc_buffer.
+ vst1_s32(acc_buffer_ptr, acc);
+ acc_buffer_ptr += 2;
+ }
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 4, 1>
+{
+ static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+ const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+ {
+ if (num_output_pixels <= 0)
+ {
+ return;
+ }
+
+ // Load the filters.
+ int8x8_t filter_s8 = vdup_n_s8(0);
+ filter_s8 = vset_lane_s8(filter_ptr[0], filter_s8, 0);
+ filter_s8 = vset_lane_s8(filter_ptr[1], filter_s8, 1);
+ filter_s8 = vset_lane_s8(filter_ptr[2], filter_s8, 2);
+ filter_s8 = vset_lane_s8(filter_ptr[3], filter_s8, 3);
+ const int16x4_t filter = vget_low_s16(vmovl_s8(filter_s8));
+
+ int outp = 0;
+
+ // Handle one output pixel at a time until second to the last pixel. Second
+ // to the last because we read eight input pixels while only processing
+ // four.
+ for (; outp < num_output_pixels - 1; outp++)
+ {
+ // Load the accumulators from acc_buffer
+ int32x4_t acc;
+ acc = vld1q_s32(acc_buffer_ptr);
+
+ // Load the inputs, add input_offset.
+ int8x8_t input_s8 = vld1_s8(input_ptr);
+ input_ptr += input_ptr_increment;
+ const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+ const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+ // Multiply-accumulate
+ acc = vmlal_s16(acc, filter, input);
+ // Store the accumulators back to acc_buffer
+ vst1q_s32(acc_buffer_ptr, acc);
+ acc_buffer_ptr += 4;
+ }
+
+ // Handle the last output pixel.
+ // Load the accumulators from acc_buffer
+ int32x4_t acc;
+ acc = vld1q_s32(acc_buffer_ptr);
+
+ // Load the inputs, add input_offset.
+ int8x8_t input_s8 = vdup_n_s8(0);
+ input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+ input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+ input_s8 = vset_lane_s8(input_ptr[2], input_s8, 2);
+ input_s8 = vset_lane_s8(input_ptr[3], input_s8, 3);
+ const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+ const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+ // Multiply-accumulate
+ acc = vmlal_s16(acc, filter, input);
+ // Store the accumulators back to acc_buffer
+ vst1q_s32(acc_buffer_ptr, acc);
+ }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 12, 1>
+{
+ static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+ const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+ {
+ // Load the filters.
+ int8x8_t filter_s8_0 = vld1_s8(filter_ptr);
+ int8x8_t filter_s8_1 = vld1_s8(filter_ptr + 4);
+ int16x8_t filter_s16_0 = vmovl_s8(filter_s8_0);
+ int16x8_t filter_s16_1 = vmovl_s8(filter_s8_1);
+ int16x4_t filter_0 = vget_low_s16(filter_s16_0);
+ int16x4_t filter_1 = vget_high_s16(filter_s16_0);
+ int16x4_t filter_2 = vget_high_s16(filter_s16_1);
+
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ // Load the inputs, add input_offset.
+ int8x8_t input_s8_0 = vld1_s8(input_ptr);
+ int8x8_t input_s8_1 = vld1_s8(input_ptr + 4);
+ input_ptr += input_ptr_increment;
+ int16x8_t input_0 = vmovl_s8(input_s8_0);
+ int16x8_t input_1 = vmovl_s8(input_s8_1);
+ input_0 = vaddq_s16(input_0, vdupq_n_s16(input_offset));
+ input_1 = vaddq_s16(input_1, vdupq_n_s16(input_offset));
+
+ // Load the accumulators from acc_buffer
+ int32x4_t acc_0 = vld1q_s32(acc_buffer_ptr + 4 * 0);
+ int32x4_t acc_1 = vld1q_s32(acc_buffer_ptr + 4 * 1);
+ int32x4_t acc_2 = vld1q_s32(acc_buffer_ptr + 4 * 2);
+
+ // Multiply-accumulate
+ acc_0 = vmlal_s16(acc_0, vget_low_s16(input_0), filter_0);
+ acc_1 = vmlal_s16(acc_1, vget_high_s16(input_0), filter_1);
+ acc_2 = vmlal_s16(acc_2, vget_high_s16(input_1), filter_2);
+
+ // Store the accumulators back to acc_buffer
+ vst1q_s32(acc_buffer_ptr + 4 * 0, acc_0);
+ vst1q_s32(acc_buffer_ptr + 4 * 1, acc_1);
+ vst1q_s32(acc_buffer_ptr + 4 * 2, acc_2);
+
+ acc_buffer_ptr += 12;
+ }
+ }
+};
+#endif
+
+// Accumulates the effect of one row of the filter, on a segment of one row
+// of the output, accessing the corresponding one row of the input.
+template <bool kAllowStrided, int kFixedInputDepth, int kFixedDepthMultiplier>
+void QuantizedDepthwiseConvAccumRow(int stride, int dilation_factor, int input_depth,
+ int input_width, const int8_t *input_data, int16_t input_offset,
+ int pad_width, int depth_multiplier, int filter_width,
+ const int8_t *filter_data, int out_x_buffer_start,
+ int out_x_buffer_end, int output_depth, int32_t *acc_buffer)
+{
+ // Consistency check parameters. This is important in particular to ensure
+ // that we keep the number of template instantiations minimal, so we don't
+ // increase binary size unnecessarily.
+ static_assert(kFixedDepthMultiplier || !kFixedInputDepth, "");
+ static_assert(kFixedInputDepth || kAllowStrided, "");
+ assert(stride == 1 || kAllowStrided);
+ if (kFixedInputDepth)
+ {
+ assert(input_depth == kFixedInputDepth);
+ }
+ if (kFixedDepthMultiplier)
+ {
+ assert(depth_multiplier == kFixedDepthMultiplier);
+ }
+ assert(output_depth == input_depth * depth_multiplier);
+ const int input_ptr_increment = stride * input_depth;
+ const int8_t *filter_base_ptr = filter_data;
+ for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ // For the current (filter_x, filter_y) point in the filter,
+ // compute the boundaries of the corresponding output row segment.
+ int out_x_loop_start_unclamped = 0;
+ int out_x_loop_end_unclamped = 0;
+ if (kAllowStrided)
+ {
+ if (stride == 2)
+ {
+ out_x_loop_start_unclamped = (pad_width - dilation_factor * filter_x + 1) / 2;
+ out_x_loop_end_unclamped = (pad_width + input_width - dilation_factor * filter_x + 1) / 2;
+ }
+ else if (stride == 4)
+ {
+ out_x_loop_start_unclamped = (pad_width - dilation_factor * filter_x + 3) / 4;
+ out_x_loop_end_unclamped = (pad_width + input_width - dilation_factor * filter_x + 3) / 4;
+ }
+ else
+ {
+ out_x_loop_start_unclamped = (pad_width - dilation_factor * filter_x + stride - 1) / stride;
+ out_x_loop_end_unclamped =
+ (pad_width + input_width - dilation_factor * filter_x + stride - 1) / stride;
+ }
+ }
+ else
+ {
+ out_x_loop_start_unclamped = pad_width - dilation_factor * filter_x;
+ out_x_loop_end_unclamped = pad_width + input_width - dilation_factor * filter_x;
+ }
+ // The kernel will have to iterate on the segment of the
+ // output row that starts at out_x_loop_start and out_x_loop_end.
+ const int out_x_loop_start = std::max(out_x_buffer_start, out_x_loop_start_unclamped);
+ const int out_x_loop_end = std::min(out_x_buffer_end, out_x_loop_end_unclamped);
+
+ int32_t *acc_buffer_ptr = acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth;
+ const int in_x_origin = (out_x_loop_start * stride) - pad_width + dilation_factor * filter_x;
+ const int8_t *input_ptr = input_data + in_x_origin * input_depth;
+ const int num_output_pixels = out_x_loop_end - out_x_loop_start;
+ QuantizedDepthwiseConvKernel<kAllowStrided, kFixedInputDepth, kFixedDepthMultiplier>::Run(
+ num_output_pixels, input_depth, depth_multiplier, input_ptr, input_offset,
+ input_ptr_increment, filter_base_ptr, acc_buffer_ptr);
+ filter_base_ptr += output_depth;
+ }
+}
+
+// generic fallback of DepthwiseConvAccumRow, portable, non-templatized.
+inline void QuantizedDepthwiseConvAccumRowGeneric(int stride, int dilation_factor, int input_depth,
+ int input_width, const int8_t *input_data,
+ int16_t input_offset, int pad_width,
+ int depth_multiplier, int filter_width,
+ const int8_t *filter_data, int out_x_buffer_start,
+ int out_x_buffer_end, int output_depth,
+ int32_t *acc_buffer)
+{
+ const int8_t *filter_base_ptr = filter_data;
+ for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ const int out_x_loop_start =
+ std::max(out_x_buffer_start, (pad_width - dilation_factor * filter_x + stride - 1) / stride);
+ const int out_x_loop_end =
+ std::min(out_x_buffer_end,
+ (pad_width + input_width - dilation_factor * filter_x + stride - 1) / stride);
+
+ int32_t *acc_buffer_ptr = acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth;
+ const int in_x_origin = (out_x_loop_start * stride) - pad_width + dilation_factor * filter_x;
+ const int8_t *input_ptr = input_data + in_x_origin * input_depth;
+ const int input_ptr_increment = (stride - 1) * input_depth;
+ for (int out_x = out_x_loop_start; out_x < out_x_loop_end; out_x++)
+ {
+ const int8_t *filter_ptr = filter_base_ptr;
+ for (int ic = 0; ic < input_depth; ++ic)
+ {
+ const int16_t input_val = *input_ptr++ + input_offset;
+ for (int m = 0; m < depth_multiplier; m++)
+ {
+ const int16_t filter_val = *filter_ptr++;
+ *acc_buffer_ptr++ += static_cast<int32_t>(filter_val) * input_val;
+ }
+ }
+ input_ptr += input_ptr_increment;
+ }
+ filter_base_ptr += output_depth;
+ }
+}
+
+// Initializes the accumulator buffer with bias values.
+inline void DepthwiseConvInitAccBuffer(int num_output_pixels, int output_depth,
+ const int32_t *bias_data, int32_t *acc_buffer)
+{
+ int i = 0;
+#ifdef USE_NEON
+ if (output_depth == 1)
+ {
+ const int32x4_t b = vdupq_n_s32(bias_data[0]);
+ for (; i <= num_output_pixels - 16; i += 16)
+ {
+ vst1q_s32(acc_buffer + i + 0, b);
+ vst1q_s32(acc_buffer + i + 4, b);
+ vst1q_s32(acc_buffer + i + 8, b);
+ vst1q_s32(acc_buffer + i + 12, b);
+ }
+ for (; i <= num_output_pixels - 4; i += 4)
+ {
+ vst1q_s32(acc_buffer + i, b);
+ }
+ }
+ else if (output_depth == 2)
+ {
+ int32x4_t b = vdupq_n_s32(bias_data[0]);
+ b = vsetq_lane_s32(bias_data[1], b, 1);
+ b = vsetq_lane_s32(bias_data[1], b, 3);
+ for (; i <= num_output_pixels - 8; i += 8)
+ {
+ vst1q_s32(acc_buffer + 2 * i + 0, b);
+ vst1q_s32(acc_buffer + 2 * i + 4, b);
+ vst1q_s32(acc_buffer + 2 * i + 8, b);
+ vst1q_s32(acc_buffer + 2 * i + 12, b);
+ }
+ for (; i <= num_output_pixels - 2; i += 2)
+ {
+ vst1q_s32(acc_buffer + 2 * i, b);
+ }
+ }
+ else if (output_depth == 4)
+ {
+ const int32x4_t b = vld1q_s32(bias_data);
+ for (; i <= num_output_pixels - 4; i += 4)
+ {
+ vst1q_s32(acc_buffer + 4 * i + 0, b);
+ vst1q_s32(acc_buffer + 4 * i + 4, b);
+ vst1q_s32(acc_buffer + 4 * i + 8, b);
+ vst1q_s32(acc_buffer + 4 * i + 12, b);
+ }
+ for (; i < num_output_pixels; i++)
+ {
+ vst1q_s32(acc_buffer + 4 * i, b);
+ }
+ }
+ else if (output_depth == 8)
+ {
+ const int32x4_t b0 = vld1q_s32(bias_data);
+ const int32x4_t b1 = vld1q_s32(bias_data + 4);
+ for (; i <= num_output_pixels - 2; i += 2)
+ {
+ vst1q_s32(acc_buffer + 8 * i + 0, b0);
+ vst1q_s32(acc_buffer + 8 * i + 4, b1);
+ vst1q_s32(acc_buffer + 8 * i + 8, b0);
+ vst1q_s32(acc_buffer + 8 * i + 12, b1);
+ }
+ for (; i < num_output_pixels; i++)
+ {
+ vst1q_s32(acc_buffer + 8 * i + 0, b0);
+ vst1q_s32(acc_buffer + 8 * i + 4, b1);
+ }
+ }
+ else if (output_depth == 16)
+ {
+ const int32x4_t b0 = vld1q_s32(bias_data);
+ const int32x4_t b1 = vld1q_s32(bias_data + 4);
+ const int32x4_t b2 = vld1q_s32(bias_data + 8);
+ const int32x4_t b3 = vld1q_s32(bias_data + 12);
+ for (; i < num_output_pixels; i++)
+ {
+ vst1q_s32(acc_buffer + 16 * i + 0, b0);
+ vst1q_s32(acc_buffer + 16 * i + 4, b1);
+ vst1q_s32(acc_buffer + 16 * i + 8, b2);
+ vst1q_s32(acc_buffer + 16 * i + 12, b3);
+ }
+ }
+#endif
+ for (; i < num_output_pixels; i++)
+ {
+ memcpy(acc_buffer + i * output_depth, bias_data, sizeof(acc_buffer[0]) * output_depth);
+ }
+}
+
+inline void DepthwiseConvGeneral(const DepthwiseConvParams ¶ms,
+ const int32_t *output_multiplier, const int32_t *output_shift,
+ const Shape &input_shape, const int8_t *input_data,
+ const Shape &filter_shape, const int8_t *filter_data,
+ const Shape & /* bias_shape */, const int32_t *bias_data,
+ const Shape &output_shape, int8_t *output_data, int thread_start,
+ int thread_end, int thread_dim)
+{
+ const int stride_width = params.stride_width;
+ const int stride_height = params.stride_height;
+ const int pad_width = params.padding_values.width;
+ const int pad_height = params.padding_values.height;
+ const int depth_multiplier = params.depth_multiplier;
+ const int32_t output_activation_min = params.quantized_activation_min;
+ const int32_t output_activation_max = params.quantized_activation_max;
+ const int32_t input_offset = params.input_offset;
+ const int32_t output_offset = params.output_offset;
+ const int dilation_width_factor = params.dilation_width_factor;
+ const int dilation_height_factor = params.dilation_height_factor;
+ const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+ const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
+ const int input_height = input_shape.Dims(1);
+ const int input_width = input_shape.Dims(2);
+ const int input_depth = input_shape.Dims(3);
+ const int filter_height = filter_shape.Dims(1);
+ const int filter_width = filter_shape.Dims(2);
+ const int output_rows = output_shape.Dims(1);
+ const int output_width = output_shape.Dims(2);
+
+ static const int kAccBufferMaxSize = 2048;
+ int32_t acc_buffer[kAccBufferMaxSize];
+ assert(kAccBufferMaxSize >= output_depth);
+ const int kOutputPixelsInAccBuffer = kAccBufferMaxSize / output_depth;
+ const int kAccBufferActualSize = kOutputPixelsInAccBuffer * output_depth;
+ UNUSED_RELEASE(kAccBufferActualSize);
+ assert(kOutputPixelsInAccBuffer * output_depth <= kAccBufferActualSize);
+ assert(kAccBufferActualSize <= kAccBufferMaxSize);
+ assert(kOutputPixelsInAccBuffer >= 1);
+ assert(thread_dim == 0 || thread_dim == 1);
+
+ // row_accum_func will point to the core accumulation function to be used
+ // for this DepthwiseConv op.
+ using row_accum_func_t = decltype(&QuantizedDepthwiseConvAccumRowGeneric);
+ row_accum_func_t row_accum_func = nullptr;
+
+#define TFMINI_USE_DEPTHWISECONV_KERNEL(ALLOW_STRIDED, FIXED_INPUT_DEPTH, FIXED_DEPTH_MULTIPLIER) \
+ if (!row_accum_func && (stride_width == 1 || ALLOW_STRIDED) && \
+ (input_depth == FIXED_INPUT_DEPTH || FIXED_INPUT_DEPTH == 0) && \
+ depth_multiplier == FIXED_DEPTH_MULTIPLIER) \
+ { \
+ row_accum_func = \
+ QuantizedDepthwiseConvAccumRow<ALLOW_STRIDED, FIXED_INPUT_DEPTH, FIXED_DEPTH_MULTIPLIER>; \
+ }
+
+#ifdef USE_NEON
+ // We go over our list of kernels by decreasing order of preference
+ // for the cases where multiple kernels could apply.
+
+ // Start with the fastest kernels: AllowStrided=false, fixed input depth.
+
+ TFMINI_USE_DEPTHWISECONV_KERNEL(false, 1, 2)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(false, 2, 2)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(false, 4, 2)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(false, 1, 4)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(false, 4, 1)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(false, 4, 4)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(false, 8, 1)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(false, 2, 8)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(false, 2, 1)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(false, 12, 1)
+
+ // Next come the strided kernels: AllowStrided=true, fixed input depth.
+ // They are a bit less efficient, but allow stride!=1.
+
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 8, 2)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 16, 1)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 16)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 20)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 32)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 8)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 8, 1)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 2, 1)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 4, 1)
+
+ // Finally, the kernels allowing a variable input depth,
+ // these are the least efficient but most general kernels.
+
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 1)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 2)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 3)
+#endif // USE_NEON
+
+ // No matching fast kernel found, use slow fallback.
+ if (!row_accum_func)
+ {
+ row_accum_func = QuantizedDepthwiseConvAccumRowGeneric;
+ }
+
+#undef TFMINI_USE_DEPTHWISECONV_KERNEL
+
+ const int input_height_stride = input_shape.Dims(3) * input_shape.Dims(2);
+ const int input_batch_stride = input_height_stride * input_shape.Dims(1);
+ const int filter_height_stride = filter_shape.Dims(3) * filter_shape.Dims(2);
+
+ // Now that we have determined row_accum_func, we can start work.
+ int batch_start = 0;
+ int batch_end = batches;
+ int row_start = 0;
+ int row_end = output_rows;
+ int output_ptr_offset = 0;
+
+ switch (thread_dim)
+ {
+ case 0:
+ assert(thread_start >= 0);
+ assert(thread_end <= batches);
+ batch_start = thread_start;
+ batch_end = thread_end;
+ output_ptr_offset = batch_start * FlatSizeSkipDim(output_shape, 0);
+ break;
+ case 1:
+ assert(thread_start >= 0);
+ assert(thread_end <= output_rows);
+ row_start = thread_start;
+ row_end = thread_end;
+ output_ptr_offset = row_start * output_width * output_depth;
+ break;
+ }
+
+ int8_t *output_ptr = output_data + output_ptr_offset;
+ int batch_step = (output_rows + row_start - row_end) * output_width * output_depth;
+ for (int b = batch_start; b < batch_end; ++b)
+ {
+ for (int out_y = row_start; out_y < row_end; ++out_y)
+ {
+ const int in_y_origin = (out_y * stride_height) - pad_height;
+ const int filter_y_start =
+ std::max(0, (-in_y_origin + dilation_height_factor - 1) / dilation_height_factor);
+ const int filter_y_end =
+ std::min(filter_height, (input_height - in_y_origin + dilation_height_factor - 1) /
+ dilation_height_factor);
+ for (int out_x_buffer_start = 0; out_x_buffer_start < output_width;
+ out_x_buffer_start += kOutputPixelsInAccBuffer)
+ {
+ const int out_x_buffer_end =
+ std::min(output_width, out_x_buffer_start + kOutputPixelsInAccBuffer);
+ // We call a 'pixel' a group of activation that share all but the
+ // 'depth'/'channel' coordinate. num_output_pixels is the number of
+ // output pixels that we will accumulate in this loop iteration.
+ const int num_output_pixels = out_x_buffer_end - out_x_buffer_start;
+ // Initialize our local accumulator with the bias values, so we don't
+ // have to add them later.
+ DepthwiseConvInitAccBuffer(num_output_pixels, output_depth, bias_data, acc_buffer);
+ // Accumulation loop. Most of the time should be spent in here.
+ for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
+ {
+ const int in_y = in_y_origin + dilation_height_factor * filter_y;
+ row_accum_func(stride_width, dilation_width_factor, input_depth, input_width,
+ input_data + in_y * input_height_stride + b * input_batch_stride,
+ input_offset, pad_width, depth_multiplier, filter_width,
+ filter_data + filter_y * filter_height_stride, out_x_buffer_start,
+ out_x_buffer_end, output_depth, acc_buffer);
+ }
+ // Finished accumulating int32_t values. Now need to convert them to
+ // the final 8bit form and store them.
+ const int num_output_values = output_depth * num_output_pixels;
+
+ Quantize(output_multiplier, output_shift, output_depth, num_output_values, output_offset,
+ output_activation_min, output_activation_max, acc_buffer, output_ptr);
+
+ output_ptr += num_output_values;
+ }
+ }
+ output_ptr += batch_step;
+ }
+}
+
+} // namespace depthwise_conv
+
+template <DepthwiseConvOutputRounding kOutputRounding>
+inline void DepthwiseConvWithRounding(const DepthwiseConvParams ¶ms,
+ const int32_t *output_multiplier, const int32_t *output_shift,
+ const Shape &input_shape, const int8_t *input_data,
+ const Shape &filter_shape, const int8_t *filter_data,
+ const Shape &bias_shape, const int32_t *bias_data,
+ const Shape &output_shape, int8_t *output_data,
+ int thread_start, int thread_end, int thread_dim)
+{
+ const int depth_multiplier = params.depth_multiplier;
+ const int dilation_width_factor = params.dilation_width_factor;
+ const int dilation_height_factor = params.dilation_height_factor;
+ UNUSED_RELEASE(depth_multiplier);
+ UNUSED_RELEASE(dilation_width_factor);
+ UNUSED_RELEASE(dilation_height_factor);
+ assert(dilation_width_factor >= 1);
+ assert(dilation_height_factor >= 1);
+ assert(input_shape.DimensionsCount() == 4);
+ assert(filter_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+ const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
+ const int input_depth = input_shape.Dims(3);
+ UNUSED_RELEASE(output_depth);
+ UNUSED_RELEASE(input_depth);
+ assert(output_depth == input_depth * depth_multiplier);
+ assert(bias_shape.FlatSize() == output_depth);
+
+// TODO Use below codes
+#if 0
+// Enable for arm64 except for the Nvidia Linux 4 Tegra (L4T) running on
+// Jetson TX-2. This compiler does not support the offsetof() macro.
+#if defined(__aarch64__) && !defined(GOOGLE_L4T)
+#if defined(__ANDROID__) && defined(__clang__)
+ CpuFlags cpu_flags;
+ GetCpuFlags(&cpu_flags);
+ const bool has_dot_product_instructions = cpu_flags.neon_dotprod;
+
+ // Dispatch to dot-product 3x3 kernels when supported.
+ if (has_dot_product_instructions)
+ {
+ using optimized_ops::depthwise_conv::DotProduct3x3KernelType;
+ DotProduct3x3KernelType kernel_type = optimized_ops::depthwise_conv::CategorizeDotProductKernel<
+ optimized_ops::depthwise_conv::QuantizationType::kPerChannelInt8>(
+ input_shape, filter_shape, output_shape, params, output_shift);
+ if (kernel_type != DotProduct3x3KernelType::kNone)
+ {
+ DepthwiseConvParams params_copy = params;
+ params_copy.output_shift_per_channel = output_shift;
+ params_copy.output_multiplier_per_channel = output_multiplier;
+ optimized_ops::depthwise_conv::DepthwiseConvDotProduct3x3PerChannel<
+ DepthwiseConvImplementation::kUseNeon3x3DotProduct>(
+ params_copy, input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data,
+ output_shape, output_data, thread_start, thread_end, thread_dim);
+ return;
+ }
+ }
+
+#endif
+ // Dispatch to non-dot-product 3x3 kernels when supported.
+
+ const int stride_width = params.stride_width;
+ const int stride_height = params.stride_height;
+ const int pad_width = params.padding_values.width;
+ const int pad_height = params.padding_values.height;
+
+ // Call kernel optimized for depthwise convolutions using 3x3 filters if
+ // parameters are supported.
+ if (optimized_ops::depthwise_conv::Fast3x3FilterKernelSupported<
+ optimized_ops::depthwise_conv::QuantizationType::kPerChannelInt8>(
+ input_shape, filter_shape, stride_width, stride_height, dilation_width_factor,
+ dilation_height_factor, pad_width, pad_height, depth_multiplier, output_shape, 0,
+ output_shift))
+ {
+ optimized_ops::depthwise_conv::DepthwiseConv3x3FilterPerChannel<
+ DepthwiseConvOutputRounding::kUpward>(
+ params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data, thread_start, thread_end, thread_dim);
+ return;
+ }
+#endif
+
+#endif /* end of if 0 */
+
+ depthwise_conv::DepthwiseConvGeneral(
+ params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data, thread_start, thread_end, thread_dim);
+}
+
+inline void DepthwiseConvImpl(const DepthwiseConvParams ¶ms, const int32_t *output_multiplier,
+ const int32_t *output_shift, const Shape &input_shape,
+ const int8_t *input_data, const Shape &filter_shape,
+ const int8_t *filter_data, const Shape &bias_shape,
+ const int32_t *bias_data, const Shape &output_shape,
+ int8_t *output_data, int thread_start, int thread_end, int thread_dim)
+{
+ return DepthwiseConvWithRounding<DepthwiseConvOutputRounding::kAwayFromZero>(
+ params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data, thread_start, thread_end, thread_dim);
+}
+
+template <typename T, typename TS> struct DepthwiseConvWorkerTask : cpu_backend_threadpool::Task
+{
+ DepthwiseConvWorkerTask(const DepthwiseConvParams ¶ms, const int32_t *output_multiplier,
+ const int32_t *output_shift, const Shape &input_shape,
+ const T *input_data, const Shape &filter_shape, const T *filter_data,
+ const Shape &bias_shape, const TS *bias_data, const Shape &output_shape,
+ T *output_data, int thread_start, int thread_end, int thread_dim)
+ : params_(params), output_multiplier_(output_multiplier), output_shift_(output_shift),
+ input_shape_(input_shape), input_data_(input_data), filter_shape_(filter_shape),
+ filter_data_(filter_data), bias_shape_(bias_shape), bias_data_(bias_data),
+ output_shape_(output_shape), output_data_(output_data), thread_start_(thread_start),
+ thread_end_(thread_end), thread_dim_(thread_dim)
+ {
+ }
+
+ void Run() override
+ {
+ DepthwiseConvImpl(params_, output_multiplier_, output_shift_, input_shape_, input_data_,
+ filter_shape_, filter_data_, bias_shape_, bias_data_, output_shape_,
+ output_data_, thread_start_, thread_end_, thread_dim_);
+ }
+
+private:
+ const DepthwiseConvParams ¶ms_;
+ const int32_t *output_multiplier_;
+ const int32_t *output_shift_;
+ const Shape &input_shape_;
+ const T *input_data_;
+ const Shape &filter_shape_;
+ const T *filter_data_;
+ const Shape &bias_shape_;
+ const TS *bias_data_;
+ const Shape &output_shape_;
+ T *output_data_;
+ int thread_start_;
+ int thread_end_;
+ int thread_dim_;
+};
+
+inline int HowManyConvThreads(const Shape &output_shape, const Shape &filter_shape, int thread_dim)
+{
+ constexpr int kMinMulPerThread = 8;
+ const int output_units = output_shape.Dims(thread_dim);
+ const int filter_height = filter_shape.Dims(1);
+ const int filter_width = filter_shape.Dims(2);
+ const int num_mul_per_unit =
+ FlatSizeSkipDim(output_shape, thread_dim) * filter_height * filter_width;
+ const int min_units_per_thread = kMinMulPerThread / num_mul_per_unit + 1;
+ int thread_count = output_units / min_units_per_thread;
+ return thread_count;
+}
+
+inline void DepthwiseConvPerChannel(const DepthwiseConvParams ¶ms,
+ const int32_t *output_multiplier, const int32_t *output_shift,
+ const Shape &input_shape, const int8_t *input_data,
+ const Shape &filter_shape, const int8_t *filter_data,
+ const Shape &bias_shape, const int32_t *bias_data,
+ const Shape &output_shape, int8_t *output_data,
+ ruy::Context *ruy_context)
+{
+ UNUSED_ALL(params, output_multiplier, output_shift, input_shape, input_data, filter_shape,
+ filter_data, bias_shape, bias_data, output_shape, output_data, ruy_context);
+
+ assert(input_shape.DimensionsCount() == 4);
+ assert(filter_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+
+ const int output_batches = output_shape.Dims(0);
+ const int output_rows = output_shape.Dims(1);
+ int thread_count_batch = HowManyConvThreads(output_shape, filter_shape, 0);
+ int thread_count_row = HowManyConvThreads(output_shape, filter_shape, 1);
+ int thread_dim, thread_count, thread_dim_size;
+ if (thread_count_batch > thread_count_row)
+ {
+ thread_dim = 0;
+ thread_dim_size = output_batches;
+ thread_count = thread_count_batch;
+ }
+ else
+ {
+ thread_dim = 1;
+ thread_dim_size = output_rows;
+ thread_count = thread_count_row;
+ }
+
+ // NOTE Borrow RuyContext to get max_num_threads setting
+ // TODO Define and use max_num_threads for CPU backend
+ const int max_threads = ruy_context->max_num_threads();
+ thread_count = std::max(1, std::min(thread_count, max_threads));
+
+ if (thread_count == 1)
+ {
+ DepthwiseConvImpl(params, output_multiplier, output_shift, input_shape, input_data,
+ filter_shape, filter_data, bias_shape, bias_data, output_shape, output_data,
+ /*thread_start=*/0,
+ /*thread_end=*/output_rows, /*thread_dim=*/1);
+ }
+ else
+ {
+ std::vector<DepthwiseConvWorkerTask<int8_t, int32_t>> tasks;
+ // TODO(b/131746020) don't create new heap allocations every time.
+ // At least we make it a single heap allocation by using reserve().
+ tasks.reserve(thread_count);
+ int thread_start = 0;
+ for (int i = 0; i < thread_count; ++i)
+ {
+ int thread_end = thread_start + (thread_dim_size - thread_start) / (thread_count - i);
+ tasks.emplace_back(params, output_multiplier, output_shift, input_shape, input_data,
+ filter_shape, filter_data, bias_shape, bias_data, output_shape,
+ output_data, thread_start, thread_end, thread_dim);
+ thread_start = thread_end;
+ }
+ cpu_backend_threadpool::Execute(tasks.size(), tasks.data(), ruy_context);
+ }
+}
+
+} // namespace optimized_integer_ops
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_OPTIMIZED_DEPTHWISE_CONV_INT8_H__
}
template <typename T>
-inline void BroadcastBinaryArithmeticOpSlowQuant8(
+inline typename std::enable_if_t<is_quant8<T>::value> BroadcastBinaryArithmeticOpSlow(
const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape, const T *input1_data,
const Shape &input2_shape, const T *input2_data, const Shape &output_shape, T *output_data,
const std::function<T(const BinaryArithmeticOpParam ¶ms, const T &, const T &)> &fn)
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
const Shape extended_output_shape = Shape::ExtendedShape(4, output_shape);
- if ((params.quantized_activation_min < 0) && (params.quantized_activation_max > 255))
- {
- throw std::runtime_error{"Support only for Quant8."};
- }
-
// Comment from tensorflow lite:
//
// In Tensorflow, the dimensions are canonically named (batch_number, row,
{
for (int c = 0; c < extended_output_shape.Dims(3); ++c)
{
- output_data[Offset(extended_output_shape, b, y, x, c)] =
- ActivationFunctionWithMinMax<uint8_t>(
- fn(params, input1_data[SubscriptToIndex(desc1, b, y, x, c)],
- input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
- params.quantized_activation_min, params.quantized_activation_max);
+ output_data[Offset(extended_output_shape, b, y, x, c)] = ActivationFunctionWithMinMax<T>(
+ fn(params, input1_data[SubscriptToIndex(desc1, b, y, x, c)],
+ input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
+ params.quantized_activation_min, params.quantized_activation_max);
}
}
}
}
}
+inline void Conv(const ConvParams ¶ms, const int32_t *output_multiplier,
+ const int32_t *output_shift, const Shape &input_shape, const int8_t *input_data,
+ const Shape &filter_shape, const int8_t *filter_data, const Shape &bias_shape,
+ const int32_t *bias_data, const Shape &output_shape, int8_t *output_data)
+{
+ UNUSED_RELEASE(bias_shape);
+ // Get parameters.
+ const int32_t input_offset = params.input_offset; // r = s(q - Z)
+ const int stride_width = params.stride_width;
+ const int stride_height = params.stride_height;
+ const int dilation_width_factor = params.dilation_width_factor;
+ const int dilation_height_factor = params.dilation_height_factor;
+ const int pad_width = params.padding_values.width;
+ const int pad_height = params.padding_values.height;
+ const int32_t output_offset = params.output_offset;
+
+ // Set min and max value of the output.
+ const int32_t output_activation_min = params.quantized_activation_min;
+ const int32_t output_activation_max = params.quantized_activation_max;
+
+ // Consistency check.
+ assert(output_activation_min < output_activation_max);
+ assert(input_shape.DimensionsCount() == 4);
+ assert(filter_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+ const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+ const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
+ const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
+ if (bias_data)
+ {
+ assert(bias_shape.FlatSize() == output_depth);
+ }
+
+ // Check dimensions of the tensors.
+ const int input_height = input_shape.Dims(1);
+ const int input_width = input_shape.Dims(2);
+ const int filter_height = filter_shape.Dims(1);
+ const int filter_width = filter_shape.Dims(2);
+ const int output_height = output_shape.Dims(1);
+ const int output_width = output_shape.Dims(2);
+ for (int batch = 0; batch < batches; ++batch)
+ {
+ for (int out_y = 0; out_y < output_height; ++out_y)
+ {
+ const int in_y_origin = (out_y * stride_height) - pad_height;
+ for (int out_x = 0; out_x < output_width; ++out_x)
+ {
+ const int in_x_origin = (out_x * stride_width) - pad_width;
+ for (int out_channel = 0; out_channel < output_depth; ++out_channel)
+ {
+ int32_t acc = 0;
+ for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+ {
+ const int in_y = in_y_origin + dilation_height_factor * filter_y;
+ for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ const int in_x = in_x_origin + dilation_width_factor * filter_x;
+
+ // Zero padding by omitting the areas outside the image.
+ const bool is_point_inside_image =
+ (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
+
+ if (!is_point_inside_image)
+ {
+ continue;
+ }
+
+ for (int in_channel = 0; in_channel < input_depth; ++in_channel)
+ {
+ int32_t input_val = input_data[Offset(input_shape, batch, in_y, in_x, in_channel)];
+ int32_t filter_val =
+ filter_data[Offset(filter_shape, out_channel, filter_y, filter_x, in_channel)];
+ // Accumulate with 32 bits accumulator.
+ // In the nudging process during model quantization, we force
+ // real value of 0.0 be represented by a quantized value. This
+ // guarantees that the input_offset is a int8_t, even though
+ // it is represented using int32_t. int32_t += int8_t *
+ // (int8_t - int8_t) so the highest value we can get from each
+ // accumulation is [-127, 127] * ([-128, 127] -
+ // [-128, 127]), which is [-32512, 32512]. log2(32512)
+ // = 14.98, which means we can accumulate at least 2^16
+ // multiplications without overflow. The accumulator is
+ // applied to a filter so the accumulation logic will hold as
+ // long as the filter size (filter_y * filter_x * in_channel)
+ // does not exceed 2^16, which is the case in all the models
+ // we have seen so far.
+ // TODO(jianlijianli): Add a check to make sure the
+ // accumulator depth is smaller than 2^16.
+ acc += filter_val * (input_val + input_offset);
+ }
+ }
+ }
+
+ if (bias_data)
+ {
+ acc += bias_data[out_channel];
+ }
+ acc = MultiplyByQuantizedMultiplier(acc, output_multiplier[out_channel],
+ output_shift[out_channel]);
+ acc += output_offset;
+ acc = std::max(acc, output_activation_min);
+ acc = std::min(acc, output_activation_max);
+ output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
+ static_cast<int8_t>(acc);
+ }
+ }
+ }
+ }
+}
+
} // namespace reference
} // namespace cker
} // namespace nnfw
author = 'Samsung Research & contributors'
# The full version, including alpha/beta/rc tags
-release = '1.12.0'
+release = '1.15.0'
# -- General configuration ---------------------------------------------------
http://download.tizen.org/releases/daily/tizen/unified/latest/repos/standard/packages/aarch64/
```
-$ wget http://download.tizen.org/releases/daily/tizen/unified/latest/repos/standard/packages/aarch64/libarmcl-v20.05-17.5.aarch64.rpm
+$ wget http://download.tizen.org/releases/daily/tizen/unified/latest/repos/standard/packages/aarch64/libarmcl-v21.02-17.5.aarch64.rpm
$ wget http://download.tizen.org/releases/daily/tizen/unified/latest/repos/standard/packages/aarch64/libhdf5-101-1.10.1-3.85.aarch64.rpm
(4) Copy to device
```
-$ sdb push libarmcl-v20.05-17.5.aarch64.rpm /opt/usr/home/owner/share/tmp/
+$ sdb push libarmcl-v21.02-17.5.aarch64.rpm /opt/usr/home/owner/share/tmp/
$ sdb push libhdf5-101-1.10.1-3.85.aarch64.rpm /opt/usr/home/owner/share/tmp/
$ sdb push libhdf5_cpp101-1.10.1-3.85.aarch64.rpm /opt/usr/home/owner/share/tmp/
```
```
sh-3.2# cd /opt/usr/home/owner/share/tmp/
-sh-3.2# rpm -i libarmcl-v20.05-17.5.aarch64.rpm
+sh-3.2# rpm -i libarmcl-v21.02-17.5.aarch64.rpm
sh-3.2# rpm -i libhdf5-101-1.10.1-3.85.aarch64.rpm
sh-3.2# rpm -i libhdf5_cpp101-1.10.1-3.85.aarch64.rpm
```
├── bin
│  ├── nnapi_test
│  ├── nnpackage_run
-│  ├── tflite_loader_test_tool
+│  ├── tflite_comparator
│  └── tflite_run
├── include
│  ├── nnfw
$ sudo ./tools/cross/install_rootfs.sh aarch64
```
- supports `arm`(default) and `aarch64` architecutre for now
-- supports `xenial`(default), `trusty` and `bionic` release
+- supports `bionic`(default) and `focal` release
To see the options,
```
***\* CAUTION: The OS version of rootfs must match the OS version of execution target device. On the other hand, you need to match the Ubuntu version of the development PC with the Ubuntu version of rootfs to be used for cross-build. Otherwise, unexpected build errors may occur.***
-If you are using Ubuntu 16.04 LTS, select `xenial`, if you are using Ubuntu 18.04 LTS, select `bionic`. You can check your Ubuntu code name in the following way.
+If you are using Ubuntu 18.04 LTS, select `bionic`, if you are using Ubuntu 20.04 LTS, select `focal`. You can check your Ubuntu code name in the following way.
```
$ cat /etc/lsb-release
Use `ROOTFS_DIR` to a full path to prepare at alternative path.
```
-$ ROOTFS_DIR=/home/user/rootfs/aarch64-xenial sudo -E ./tools/cross/install_rootfs.sh aarch64
+$ ROOTFS_DIR=/home/user/rootfs/aarch64-bionic sudo -E ./tools/cross/install_rootfs.sh aarch64
```
### Using proxy
$ sudo ./tools/cross/install_rootfs.sh arm
```
- supports `arm`(default) and `aarch` architecutre for now
-- supports `bionic`(default), `trusty`, `xenial` and `focal` release
+- supports `bionic`(default), and `focal` release
To see the options,
```
***\* CAUTION: The OS version of rootfs must match the OS version of execution target device. On the other hand, you need to match the Ubuntu version of the development PC with the Ubuntu version of rootfs to be used for cross-build. Otherwise, unexpected build errors may occur.***
-If you are using Ubuntu 16.04 LTS, select `xenial`, if you are using Ubuntu 18.04 LTS, select `bionic`. You can check your Ubuntu code name in the following way.
+If you are using Ubuntu 18.04 LTS, select `bionic`, if you are using Ubuntu 20.04 LTS, select `focal`. You can check your Ubuntu code name in the following way.
```
$ cat /etc/lsb-release
Use `ROOTFS_DIR` to a full path to prepare at alternative path.
```
-$ ROOTFS_DIR=/home/user/rootfs/arm-xenial sudo -E ./tools/cross/install_rootfs.sh arm
+$ ROOTFS_DIR=/home/user/rootfs/arm-bionic sudo -E ./tools/cross/install_rootfs.sh arm
```
### Using proxy
--slave /usr/bin/arm-linux-gnueabihf-gcov arm-linux-gnueabihf-gcov /usr/bin/arm-linux-gnueabihf-gcov-8
```
-### Ubuntu 16.04 LTS
+### Ubuntu 20.04 LTS
-On Ubuntu 16.04 or older, follow the next steps:
+Same with Ubuntu 18.04 LTS. (except g++ version)
-```
-$ cd ~/your/path
-$ wget https://releases.linaro.org/components/toolchain/binaries/7.2-2017.11/arm-linux-gnueabihf/gcc-linaro-7.2.1-2017.11-x86_64_arm-linux-gnueabihf.tar.xz
-$ tar xvf gcc-linaro-7.2.1-2017.11-x86_64_arm-linux-gnueabihf.tar.xz
-$ echo 'export PATH=~/your/path/gcc-linaro-7.2.1-2017.11-x86_64_arm-linux-gnueabihf/bin:$PATH' >> ~/.bashrc
-```
+## Build and install ARM Compute Library
-Make sure you get `libstdc++.so` updated on your target with your new toolchain's corresponding one.
+Mostly you only need once of ACL (ARM Compute Library) build.
-For example, if you installed gcc-linaro-7.2.1-2017.11 above, do
+To build ACL, you need to install scons
```
-$ wget https://releases.linaro.org/components/toolchain/binaries/7.2-2017.11/arm-linux-gnueabihf/runtime-gcc-linaro-7.2.1-2017.11-arm-linux-gnueabihf.tar.xz
-$ tar xvf runtime-gcc-linaro-7.2.1-2017.11-arm-linux-gnueabihf.tar.xz
+$ sudo apt-get install scons
```
-Then, copy `libstdc++.so.6.0.24` into `/usr/lib/arm-linux-gnueabihf`, and update symbolic links on your device.
-
-## Build and install ARM Compute Library
-
-Mostly you only need once of ACL build.
-
ACL will be automatically installed in `externals/acl` when you build runtime without any changes.
You can check ACL source information in `infra/cmake/packages/ARMComputeSourceConfig.cmake`
--- /dev/null
+.. ONE documentation master file, created by
+ sphinx-quickstart on Wed Jan 14 16:48:12 2021.
+ You can adapt this file completely to your liking, but it should at least
+ contain the root `toctree` directive.
+
+1.0
+===
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Contents:
+
+ ./release-note-1.13.0.md
--- /dev/null
+# Release Note 1.13.0
+
+## ONE Compiler
+
+### Compiler Frontend
+
+- Add optimization pass: ConvertNCHWToNHWC, FoldSparseToDensePass, FuseBatchNormWithConvPass, ForwardReshapeToUnaryOpPass, RemoveUnnecessarySlicePass, RemoveUnnecessarySplitPass, RemoveUnnecessaryReshapePass, RemoveRedundantReshape, SubstituteTransposeToReshapePass, SubstituteSqueezeToReshapePass,
+- Support more operators: FAKE_QUANT
+- Enhancements: Support auto generated random input for record-minmax (for better quantization testing)
+- Changes: `--all` option to `--O1` in circle2circle(and one-optimize)
+- Fixes: `tf2tfliteV2` accept input shapes `--v2` option, lots of fixes for increase test coverage
+- Experimental: Compile ONNX models to circle
--- /dev/null
+.. ONE documentation master file, created by
+ sphinx-quickstart on Thu Mar 18 16:47:12 2021.
+ You can adapt this file completely to your liking, but it should at least
+ contain the root `toctree` directive.
+
+1.0
+===
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Contents:
+
+ ./release-note-1.14.0.md
--- /dev/null
+# Release Note 1.14.0
+
+## ONE Compiler
+
+### Compiler Frontend
+
+- `one-codegen` interface now distinguishes own arguments from backend's.
+- Adds `RemoveUnnecessaryStridedSlice` optimization pass.
+- Introduces experimental support for generating profile data.
+ - Adds `--generate_profile_data` option to `one-optimize`, `one-quantize`.
--- /dev/null
+.. ONE documentation master file, created by
+ sphinx-quickstart on Thu Mar 18 16:47:12 2021.
+ You can adapt this file completely to your liking, but it should at least
+ contain the root `toctree` directive.
+
+1.0
+===
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Contents:
+
+ ./release-note-1.15.0.md
--- /dev/null
+# Release Note 1.15.0
+
+## ONE Compiler
+
+### Compiler Frontend
+
+- Support more Ops for quantization
+- Fix `record-minmax` tool for bool type, NaN values
+- Fix `one-cmds` test scripts
+- Remove `stdex` module
+- `arser` supports short option
+
+
+## ONE Runtime
+
+### Runtime backend supports more operations and types
+
+- CPU backend
+ - Add: int8
+ - AvgPool2d: int8
+ - Conv2D: int8
+ - DepthwiseConv2D: int8
+ - Div: uint8
+ - Elu: float
+ - ExpandDims: int8
+ - LogicalAnd: boolean
+ - Maximum: uint8
+ - MaxPool2D: int8
+ - Minimum: uint8
+ - Mul: int8
+ - Pad: int8
+ - PadV2: int8
+ - Quantize: uint8, int8
+ - Reshape: int8
+ - Resizebiliear: int8
+ - Softmax: int8
+ - Squeeze: int8
+ - Sub: int8
+
+### ARM Compute Library Update
+
+- ONERT uses Compute Library v21.02
./1.10/index
./1.11/index
./1.12/index
+ ./1.13/index
Here are major classes are described below. One must implement these classes(and some more classes) to create a backend.
- `Backend` : Responsible to create a backend context which is a set of backend components
-- `IConfig` : Configurations and miscellaneous stuff
+- `BackendContext` : Holds data for the current session and also responsible to create tensor objects and kernels
+ - `BackendContext::genTensors` : Create tensor objects
+ - `BackendContext::genKernels` : Create kernels
+- `IConfig` : Configurations and miscellaneous stuff (not session based, global)
- `ITensorRegistry` : A set of tensor(`ITensor`) objects that are used by the current backend
-- `ITensorBuilder` : Make tensor object and register it to `ITensorRegistry` and static tensors
-- `IKernelGenerator` : Generates operation kernels
Please refer to each class document for details. You may refer to [Bundle Backends](#bundle-backends) for actual implementation samples.
## Provided Backend Implementations
-We provide some backends along with the runtime. There is the special backend `controlflow` which is part of runtime core, and some bundle backends which are baseline backends and samples of backend implementation.
+We provide some backends along with the runtime. There is the special backend `builtin` which is part of runtime core, and some bundle backends which are baseline backends and samples of backend implementation.
-## `controlflow` Backend
+## `builtin` Backend
-`controlflow` is a special backend that is always loaded(statically linked, part of runtime core). It is implemented just like other backends, but there are some things that it does exclusively.
+`builtin` is a special backend that is always loaded(statically linked, part of runtime core). It is implemented just like other backends, but there are some things that it does exclusively.
- Has kernels for If, While and Permute operations (Kernels from other backends are never be used)
-- The runtime core directly creates `controlflow`'s tensor objects to accept user-given input and output buffers
-- The runtime core gives the executor context to `controlflow` backend which allows control flow ops can change execution flow properly
+- The runtime core directly creates `builtin`'s tensor objects to accept user-given input and output buffers
+- The runtime core gives the executor context to `builtin` backend which allows control flow ops can change execution flow properly
## Bundle Backends
# Supported Operations and backend
-As of 2020-12-07
+As of 2021-03-08
### Raw-data format (float32, int32, boolean, etc)
AvgPool2D | O | O | O
BatchMatmul | O | |
BatchToSpaceND | O | O | O
+BroadcastTo | O | |
Cast | O | O | O
Concat | O | O | O
Conv2D | O | O | O
DepthToSpace | O | O | O
DepthwiseConv2D | O | O | O
Div | O | O | O
+Einsum | O | |
+Elu | O | |
EmbeddingLookup | | O | O
Equal | O | O | O
Exp | O | O | O
-ExpandDims | O | |
+ExpandDims | O | O | O
Fill | O | |
Floor | O | O | O
FullyConnected | O | O | O
+FusedBatchNorm | O | |
Gather | O | O | O
Greater | O | O | O
GreaterEqual | O | O | O
LessEqual | O | O | O
LocalResponseNormalize | | O | O
Log | O | |
-LogicalAnd | | O | O
+LogicalAnd | O | O | O
LogicalNot | O | O | O
LogicalOr | O | O | O
Logistic | O | O | O
LogSoftmax | O | |
-LSHProjection | | |
LSTM | | O | O
+MatrixBandPart | O | |
Maximum | O | O | O
MaxPool2D | O | O | O
Mean | O | O | O
Quantize | O | |
Range | O | |
Rank | O | |
+ReduceAny(All) | O | |
ReduceAny(Any) | O | |
ReduceMax(Max) | O | O | O
ReduceMin(Min) | O | O | O
ReduceProd | O | |
ReduceSum(Sum) | O | O | O
ReLU | O | O | O
-ReLU6 | | O | O
+ReLU6 | O | O | O
Reshape | O | O | O
ResizeBilinear | O | O | O
-ReverseV2 | O | | O
+ResizeNearestNeighbor | | O | O
+ReverseV2 | O | O | O
RNN | | O | O
Round | O | |
Rsqrt | O | O | O
SpaceToBatchND | O | O | O
SpaceToDepth | O | O | O
Split | O | O | O
-SplitV | O | |
+SplitV | O | O |
Sqrt | O | O | O
Square | O | | |
SquaredDifference | O | O | O
Squeeze | O | O | O
StridedSlice | O | O | O
Sub | O | O | O
-Svdf | | |
Tanh | O | O | O
Tile | O | |
TopKV2 | | | O
DepthToSpace | O | O | O
DepthwiseConv2D | O | O | O
Dequantize | O | O | O
+Div | O | |
EmbeddingLookup | | O | O
Equal | O | O | O
-ExpandDims | O | |
+Erf | O | |
+ExpandDims | O | O | O
FullyConnected | O | O | O
Gather | O | O | O
Greater | O | O | O
LessEqual | O | O | O
Logistic | O | O | O
LogSoftmax | O | |
-Maximum | | O | O
+Maximum | O | O | O
MaxPool2D | O | O | O
Mean | O | O | O
-Minimum | | O | O
+Minimum | O | O | O
Mul | O | O |
NotEqual | O | O | O
-OneHot | | O |
Pack | | O | O
Pad | O | O | O
PadV2 | O | O | O
PReLU | | O | O
+Quantize | O | |
Rank | O | |
ReduceMax(Max) | | O |
ReduceMin(Min) | | O |
ReLU | | O | O
ReLU6 | | O | O
Reshape | O | O | O
-ResizeBilinear | O | | O
+ResizeBilinear | O | O | O
+ResizeNearestNeighbor | | O | O
Shape | O | |
Slice | O | O | O
Softmax | O | O | O
SpaceToBatchND | O | O | O
SpaceToDepth | O | O | O
Split | O | O | O
-SplitV | O | |
+SplitV | O | O |
Squeeze | O | O | O
+StatelessRandomUniform | O | |
StridedSlice | | O | O
Sub | O | O | O
Tanh | O | O | O
Operation | CPU | ACL-CL | ACL-NEON
-- | -- | -- | --
+Add | O | O | O
ArgMax | O | O | O
ArgMin | O | O | O
-Concat | O | |
+AvgPool2D | O | |
+Concat | O | O | O
+Conv2D | O | |
DepthToSpace | O | |
-Dequantize | O | |
+DepthwiseConv2D | O | |
+Dequantize | O | O | O
+ExpandDims | O | O | O
+MaxPool2D | O | |
+Mul | O | O | O
+Pad | O | O | O
+PadV2 | O | |
+PReLU | | O | O
+Quantize | O | |
Rank | O | |
+Reshape | O | O | O
+ResizeBilinear | O | O | O
+ResizeNearestNeighbor | | O | O
Shape | O | |
+Softmax | O | O | O
+Squeeze | O | O | O
+Sub | O | O | O
set(HOST_ARCH_BASE "arm")
elseif("${HOST_ARCH}" STREQUAL "aarch64")
set(HOST_ARCH_BASE "aarch64")
+elseif("${HOST_ARCH}" STREQUAL "i686")
+ set(HOST_ARCH_BASE "i686")
else()
message(FATAL_ERROR "'${HOST_ARCH}' architecture is not supported")
endif()
set(TARGET_ARCH_BASE "arm")
elseif("${TARGET_ARCH}" STREQUAL "aarch64")
set(TARGET_ARCH_BASE "aarch64")
+elseif("${TARGET_ARCH}" STREQUAL "i686")
+ set(TARGET_ARCH_BASE "i686")
else()
message(FATAL_ERROR "'${TARGET_ARCH}' architecture is not supported")
endif()
nnas_include(OptionTools)
envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
- set(ARMCOMPUTE_URL ${EXTERNAL_DOWNLOAD_SERVER}/ARM-software/ComputeLibrary/archive/v20.05.tar.gz)
+ set(ARMCOMPUTE_URL ${EXTERNAL_DOWNLOAD_SERVER}/ARM-software/ComputeLibrary/archive/v21.02.tar.gz)
ExternalSource_Download(ARMCOMPUTE ${ARMCOMPUTE_URL})
set(ARMComputeSource_DIR ${ARMCOMPUTE_SOURCE_DIR} PARENT_SCOPE)
nnas_include(ExternalSourceTools)
nnas_include(OptionTools)
- # NOTE TensorFlow 1.12 downloads abseil from the following URL
- # - https://github.com/abseil/abseil-cpp/archive/48cd2c3f351ff188bc85684b84a91b6e6d17d896.tar.gz
- #
- # The last change of "48cd2c3f351" was commited on 2018.09.27
- #
- # Let's use the latest released version (2020-02 release patch 2)
+ # NOTE TensorFlow 2.3 downloads abseil from the following URL
envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
- envoption(ABSEIL_URL ${EXTERNAL_DOWNLOAD_SERVER}/abseil/abseil-cpp/archive/20200225.2.tar.gz)
+ envoption(ABSEIL_URL ${EXTERNAL_DOWNLOAD_SERVER}/abseil/abseil-cpp/archive/df3ea785d8c30a9503321a3d35ee7d35808f190d.tar.gz)
ExternalSource_Download(ABSEIL
DIRNAME ABSEIL
URL ${ABSEIL_URL}
- CHECKSUM MD5=73f2b6e72f1599a9139170c29482ddc4)
+ CHECKSUM MD5=4d9aa7e757adf48fef171c85f0d88552)
set(AbseilSource_DIR ${ABSEIL_SOURCE_DIR} PARENT_SCOPE)
set(AbseilSource_FOUND TRUE PARENT_SCOPE)
--- /dev/null
+function(_OouraFFTSource_import)
+ if(NOT DOWNLOAD_OOURAFFT)
+ set(OouraFFTSource_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT DOWNLOAD_OOURAFFT)
+
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ # NOTE TensorFlow 2.3 downloads OOURAFFT from the following URL
+ envoption(OOURAFFT_URL https://github.com/petewarden/OouraFFT/archive/v1.0.tar.gz)
+
+ ExternalSource_Download(OOURAFFT ${OOURAFFT_URL})
+
+ set(OouraFFTSource_DIR ${OOURAFFT_SOURCE_DIR} PARENT_SCOPE)
+ set(OouraFFTSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_OouraFFTSource_import)
+
+_OouraFFTSource_import()
DOCKER_BUILD_ARGS=()
# Default setting
-UBUNTU_CODENAME="xenial"
+UBUNTU_CODENAME="bionic"
DOCKER_TAG="latest"
while [[ $# -gt 0 ]]
return
fi
- CLANG_FORMAT_CANDIDATES+=("clang-format-3.9")
+ CLANG_FORMAT_CANDIDATES+=("clang-format-8")
for CLANG_FORMAT_CANDIDATE in ${CLANG_FORMAT_CANDIDATES[@]}; do
if command_exists ${CLANG_FORMAT_CANDIDATE} ; then
CLANG_FORMAT="${CLANG_FORMAT_CANDIDATE}"
done
if [[ -z ${CLANG_FORMAT} ]]; then
- echo "[ERROR] clang-format-3.9 is unavailable"
- echo
- echo " Please install clang-format-3.9 before running format check"
- exit 1
- fi
-
- # Migration to clang-format-8
- # TODO Remove this after migration to clang-format-8
- CLANG_FORMAT_8="clang-format-8"
- if ! command_exists $CLANG_FORMAT_8_CANDIDATE; then
echo "[ERROR] clang-format-8 is unavailable"
echo
echo " Please install clang-format-8 before running format check"
- echo " (or use latest docker image if you are using docker for format check)"
exit 1
fi
- for DIR_CLANG_FORMAT_8 in $(git ls-files -co --exclude-standard '*/.clang-format'); do
- DIRECTORIES_USE_CLANG_FORMAT_8+=($(dirname "${DIR_CLANG_FORMAT_8}"))
- done
# Check c++ files
FILES_TO_CHECK_CPP=()
- FILES_TO_CHECK_CPP_BY_CLANG_FORMAT_8=()
for f in ${FILES_TO_CHECK[@]}; do
# Manually ignore style checking
if [[ ${f} == +(*/NeuralNetworks.h|*/NeuralNetworksExtensions.h) ]]; then
# File extension to check
if [[ ${f} == +(*.h|*.hpp|*.cpp|*.cc|*.c|*.cl) ]]; then
-
- # Check clang-format-8 target files first
- # TODO Remove this after migration to clang-format-8
- FOUND_CLANG_8=0
- for USE_CLANG_FORMAT_8 in ${DIRECTORIES_USE_CLANG_FORMAT_8[@]}; do
- if [[ $f = $USE_CLANG_FORMAT_8* ]]; then
- FILES_TO_CHECK_CPP_BY_CLANG_FORMAT_8+=("$f")
- FOUND_CLANG_8=1
- break
- fi
- done
-
- if [[ $FOUND_CLANG_8 -ne 1 ]]; then
- FILES_TO_CHECK_CPP+=("${f}")
- fi
+ FILES_TO_CHECK_CPP+=("${f}")
fi
done
INVALID_EXIT=${EXIT_CODE}
fi
fi
-
- # Check by clang-format-8
- # TODO Remove this after migration to clang-format-8
- if [[ ${#FILES_TO_CHECK_CPP_BY_CLANG_FORMAT_8} -ne 0 ]]; then
- ${CLANG_FORMAT_8} -i ${FILES_TO_CHECK_CPP_BY_CLANG_FORMAT_8[@]}
- EXIT_CODE=$?
- if [[ ${EXIT_CODE} -ne 0 ]]; then
- INVALID_EXIT=${EXIT_CODE}
- fi
- fi
}
function check_python_files() {
"${CANDIDATES[@]}"
# Exclude *.test.cpp files from coverage report
-"${LCOV_PATH}" -r "${EXTRACTED_COVERAGE_INFO_PATH}" -o "${EXCLUDED_COVERAGE_INFO_PATH}" \
- '*.test.cpp'
-
# Exclude flatbuffer generated files from coverage report
"${LCOV_PATH}" -r "${EXTRACTED_COVERAGE_INFO_PATH}" -o "${EXCLUDED_COVERAGE_INFO_PATH}" \
- '*_schema_generated.h'
+ '*.test.cpp' '*_schema_generated.h'
# Final coverage data
cp -v ${EXCLUDED_COVERAGE_INFO_PATH} ${COVERAGE_INFO_PATH}
+++ /dev/null
-FROM ubuntu:16.04
-
-ARG UBUNTU_MIRROR
-
-RUN if [ -n "$http_proxy" ] ; then echo "Acquire::http::proxy \"${http_proxy}\";" >> /etc/apt/apt.conf ; fi
-RUN if [ -n "$https_proxy" ] ; then echo "Acquire::https::proxy \"${https_proxy}\";" >> /etc/apt/apt.conf ; fi
-RUN if [ -n "$UBUNTU_MIRROR" ] ; then sed "s/archive.ubuntu.com/${UBUNTU_MIRROR}/g" -i /etc/apt/sources.list ; fi
-
-# Install 'add-apt-repository'
-RUN apt-get update && apt-get -qqy install software-properties-common
-
-# Build tool
-RUN apt-get update && apt-get -qqy install build-essential cmake scons git lcov
-
-# Install extra dependencies (Caffe, nnkit)
-RUN apt-get update && apt-get -qqy install libboost-all-dev libgflags-dev libgoogle-glog-dev libatlas-base-dev libhdf5-dev
-
-# Install protocol buffer
-RUN apt-get update && apt-get -qqy install libprotobuf-dev protobuf-compiler
-
-# Additonal tools
-RUN apt-get update && \
- apt-get -qqy install doxygen graphviz wget unzip clang-format-3.9 clang-format-8 python3 python3-pip python3-venv hdf5-tools pylint curl
-RUN pip3 install --upgrade pip
-RUN pip3 install yapf==0.22.0 numpy
-
-# Install google test (source)
-RUN apt-get update && apt-get -qqy install libgtest-dev
-
-###
-### NOTE: Don't add new package install using apt-get or pip below this line
-###
-
-# Install native build tool gcc version 6.x
-RUN add-apt-repository ppa:ubuntu-toolchain-r/test && apt-get update && apt-get -qqy install gcc-6 g++-6
-RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-6 60 --slave /usr/bin/g++ g++ /usr/bin/g++-6 && update-alternatives --config gcc
-
-# Install cross build tool gcc version 6.x
-RUN wget https://releases.linaro.org/components/toolchain/binaries/6.3-2017.02/arm-linux-gnueabihf/gcc-linaro-6.3.1-2017.02-x86_64_arm-linux-gnueabihf.tar.xz -O gcc-hardfp.tar.xz -nv
-RUN wget https://releases.linaro.org/components/toolchain/binaries/6.2-2016.11/arm-linux-gnueabi/gcc-linaro-6.2.1-2016.11-x86_64_arm-linux-gnueabi.tar.xz -O gcc-softfp.tar.xz -nv
-RUN wget https://releases.linaro.org/components/toolchain/binaries/6.2-2016.11/aarch64-linux-gnu/gcc-linaro-6.2.1-2016.11-x86_64_aarch64-linux-gnu.tar.xz -O gcc-aarch64.tar.xz -nv
-RUN tar -xf gcc-hardfp.tar.xz -C /opt/ && rm -rf gcc-hardfp.tar.xz
-RUN tar -xf gcc-softfp.tar.xz -C /opt/ && rm -rf gcc-softfp.tar.xz
-RUN tar -xf gcc-aarch64.tar.xz -C /opt/ && rm -rf gcc-aarch64.tar.xz
-ENV PATH "/opt/gcc-linaro-6.2.1-2016.11-x86_64_arm-linux-gnueabi/bin:/opt/gcc-linaro-6.3.1-2017.02-x86_64_arm-linux-gnueabihf/bin:/opt/gcc-linaro-6.2.1-2016.11-x86_64_aarch64-linux-gnu/bin:$PATH"
-
-###
-### NOTE: Don't add build & install process using installed buildtool above this line
-###
-
-# Build and install google test static libraries
-WORKDIR /root/gtest
-RUN cmake /usr/src/gtest
-RUN make
-RUN mv *.a /usr/lib
-WORKDIR /root
-RUN rm -rf gtest
-
-# Install gbs & sdb
-RUN echo 'deb [trusted=yes] http://download.tizen.org/tools/latest-release/Ubuntu_16.04/ /' | cat >> /etc/apt/sources.list
-RUN apt-get update && apt-get -qqy install gbs
-RUN wget http://download.tizen.org/sdk/tizenstudio/official/binary/sdb_3.1.4_ubuntu-64.zip -O sdb.zip
-RUN unzip -d tmp sdb.zip && rm sdb.zip
-RUN cp tmp/data/tools/sdb /usr/bin/. && rm -rf tmp
-
-# Clean archives (to reduce image size)
-RUN apt-get clean -y
exit 255
fi
-BUILD_ITEMS="angkor cwrap pepper-str pepper-strcast pp stdex \
+BUILD_ITEMS="angkor cwrap pepper-str pepper-strcast pp \
oops pepper-assert \
hermes hermes-std \
loco locop locomotiv logo-core logo \
-foder souschef arser vconone \
+foder souschef arser vconone crew \
safemain mio-circle mio-tflite \
tflite2circle \
luci \
luci-interpreter \
+luci-eval-driver \
+luci-pass-value-test \
luci-value-test \
record-minmax \
circle2circle circle-quantizer"
option(BUILD_NNPACKAGE_RUN "Build nnpackge_run" ON)
option(BUILD_TFLITE_LOADER "Build TensorFlow Lite loader" ON)
option(BUILD_CIRCLE_LOADER "Build circle loader" ON)
-option(BUILD_TFLITE_LOADER_TEST_TOOL "Build tflite loader testing tool" ON)
+option(BUILD_TFLITE_COMPARATOR_TEST_TOOL "Build tflite loader testing tool" ON)
option(BUILD_WITH_HDF5 "Build test tool with HDF5 library" ON)
option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" ON)
option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" ON)
option(DOWNLOAD_BOOST "Download boost source" OFF)
option(DOWNLOAD_RUY "Download ruy source" ON)
option(DOWNLOAD_CPUINFO "Download cpuinfo source" ON)
+option(DOWNLOAD_OOURAFFT "Download Ooura FFT source" ON)
option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" ON)
option(BUILD_BOOST "Build boost source" OFF)
option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" ON)
--- /dev/null
+#
+# i686 tizen compile options
+#
+
+message(STATUS "Building for i686 Tizen")
+
+# Build flag for tizen
+set(CMAKE_C_FLAGS_DEBUG "-O -g -DDEBUG")
+set(CMAKE_CXX_FLAGS_DEBUG "-O -g -DDEBUG")
+
+# TODO : add and use option_tizen if something uncommon comes up
+# include linux common
+include("cmake/buildtool/config/config_linux.cmake")
+
+# addition for i686-tizen
+set(FLAGS_COMMON ${FLAGS_COMMON}
+ )
--- /dev/null
+#
+# i686 tizen cmake options
+#
+option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
+option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" OFF)
+option(DOWNLOAD_ARMCOMPUTE "Download ARM Compute source" OFF)
+
+option(BUILD_LOGGING "Build logging runtime" OFF)
+option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
+option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OFF)
+
+option(BUILD_XNNPACK "Build XNNPACK" OFF)
# Let's build and install ARMCompute libraries
function(_ARMCompute_Build ARMComputeInstall_DIR)
set(PKG_NAME "ARMCOMPUTE")
- set(PKG_IDENTIFIER "20.05")
+ set(PKG_IDENTIFIER "21.02")
set(INSTALL_STAMP_PATH "${ARMComputeInstall_DIR}/${PKG_NAME}.stamp")
set(ARMComputeBuild_DIR "${CMAKE_BINARY_DIR}/externals/armcompute")
endmacro(return_unless)
# Required packages
- nnas_find_package(AbseilSource QUIET)
- return_unless(AbseilSource_FOUND)
+ nnas_find_package(Abseil QUIET)
+ return_unless(Abseil_FOUND)
nnfw_find_package(TensorFlowEigen EXACT 1.13.1 QUIET)
return_unless(TensorFlowEigen_1_13_1_FOUND)
nnas_find_package(FarmhashSource QUIET)
+++ /dev/null
-# Reference: https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/lite/tools/make/Makefile
-#
-# Tensorflow Lite library 2.3.0
-#
-set(TENSORFLOW_LITE_BASE ${TFLiteVanillaTensorFlowSource_DIR}/tensorflow/lite)
-
-file(GLOB TFLITE_CORE_SRCS "${TENSORFLOW_LITE_BASE}/*.c"
- "${TENSORFLOW_LITE_BASE}/*.cc"
- "${TENSORFLOW_LITE_BASE}/core/*.cc")
-
-file(GLOB_RECURSE TFLITE_KERNEL_SRCS "${TENSORFLOW_LITE_BASE}/kernels/*.cc")
-
-file(GLOB TFLITE_LIB_SRCS "${TENSORFLOW_LITE_BASE}/c/*.c" "${TENSORFLOW_LITE_BASE}/c/*.cc")
-
-file(GLOB TFLITE_API_SRCS "${TENSORFLOW_LITE_BASE}/core/api/*.c"
- "${TENSORFLOW_LITE_BASE}/core/api/*.cc")
-
-list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/memory_info.cc")
-list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/time.cc")
-
-file(GLOB TFLITE_EXPERIMENTAL_SRCS "${TENSORFLOW_LITE_BASE}/experimental/resource/*.cc")
-
-file(GLOB TFLITE_SPARSITY_SRCS "${TENSORFLOW_LITE_BASE}/tools/optimize/sparsity/*.cc")
-
-list(APPEND TFLITE_SRCS ${TFLITE_CORE_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_KERNEL_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_LIB_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_API_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_PROFILING_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_EXPERIMENTAL_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_SPARSITY_SRCS})
-
-# externals
-list(APPEND TFLITE_SRCS "${TFLiteVanillaFarmhashSource_DIR}/src/farmhash.cc")
-list(APPEND TFLITE_SRCS "${TFLiteVanillaFFT2DSource_DIR}/fftsg.c")
-list(APPEND TFLITE_SRCS "${TFLiteVanillaFFT2DSource_DIR}/fftsg2d.c")
-list(APPEND TFLITE_SRCS "${TFLiteVanillaFlatBuffersSource_DIR}/src/util.cpp")
-
-# externals - absl
-file(GLOB_RECURSE ABSL_SRCS "${TFLiteVanillaAbslSource_DIR}/absl/*.cc")
-file(GLOB_RECURSE ABSL_EXCLS "${TFLiteVanillaAbslSource_DIR}/absl/*test*.cc"
- "${TFLiteVanillaAbslSource_DIR}/absl/*benchmark*.cc"
- "${TFLiteVanillaAbslSource_DIR}/absl/synchronization/*.cc"
- "${TFLiteVanillaAbslSource_DIR}/absl/debugging/*.cc"
- "${TFLiteVanillaAbslSource_DIR}/absl/hash/*.cc"
- "${TFLiteVanillaAbslSource_DIR}/absl/flags/*.cc"
- "${TFLiteVanillaAbslSource_DIR}/absl/random/*.cc")
-list(REMOVE_ITEM ABSL_SRCS ${ABSL_EXCLS})
-list(APPEND TFLITE_SRCS ${ABSL_SRCS})
-
-# externals - ruy
-file(GLOB RUY_SRCS "${TFLiteVanillaRuySource_DIR}/ruy/*.cc")
-file(GLOB_RECURSE RUY_EXCLS "${TFLiteVanillaRuySource_DIR}/ruy/*test*.cc"
- "${TFLiteVanillaRuySource_DIR}/ruy/*benchmark*.cc"
- "${TFLiteVanillaRuySource_DIR}/ruy/*example*.cc")
-list(REMOVE_ITEM RUY_SRCS ${RUY_EXCLS})
-# Temporary fix for ruy compilation error.
-# TODO(b/158800055): Remove this hack once the ruy version is correctly bumped.
-list(REMOVE_ITEM RUY_SRCS "${TFLiteVanillaRuySource_DIR}/ruy/prepare_packed_matrices.cc")
-list(APPEND TFLITE_SRCS ${RUY_SRCS})
-
-
-# Build with mmap? true
-# caution: v2.3.0's Makefile has wrong code on this part. This is fixed on master branch.
-set(BUILD_WITH_MMAP TRUE)
-if(${BUILD_WITH_MMAP})
- list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/mmap_allocation_disabled.cc")
-else()
- list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/mmap_allocation.cc")
-endif()
-
-# Build with nnapi? true
-# caution: this nnapi delegate comes from tflite, not ours.
-set(BUILD_WITH_NNAPI TRUE)
-if(${BUILD_WITH_NNAPI})
- list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/nnapi_delegate.cc")
- list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/quant_lstm_sup.cc")
- list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_implementation.cc")
- list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_util.cc")
-else()
- list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/nnapi_delegate_disabled.cc")
- list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_implementation_disabled.cc")
-endif()
-
-# ios: we don't support ios
-list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/minimal_logging_ios.cc")
-
-# android
-if(NOT ANDROID)
- list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/minimal_logging_android.cc")
-endif()
-
-# exclude some source files
-file(GLOB_RECURSE TFLITE_EXCLS "${TENSORFLOW_LITE_BASE}/*test*.cc"
- "${TENSORFLOW_LITE_BASE}/*benchmark*.cc"
- "${TENSORFLOW_LITE_BASE}/*example*.cc"
- "${TENSORFLOW_LITE_BASE}/*tool*.cc")
-list(REMOVE_ITEM TFLITE_SRCS ${TFLITE_EXCLS})
-
-# include headers
-list(APPEND TFLITE_INCLUDES "${TFLiteVanillaTensorFlowSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${TFLiteVanillaEigenSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${TFLiteVanillaAbslSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${TFLiteVanillaGEMMLowpSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${TFLiteVanillaNEON2SSESource_DIR}")
-list(APPEND TFLITE_INCLUDES "${TFLiteVanillaFarmhashSource_DIR}/src")
-list(APPEND TFLITE_INCLUDES "${TFLiteVanillaFlatBuffersSource_DIR}/include")
-list(APPEND TFLITE_INCLUDES "${TFLiteVanillaFP16Source_DIR}/include")
-list(APPEND TFLITE_INCLUDES "${TFLiteVanillaRuySource_DIR}")
-
-add_library(tensorflow-lite-2.3.0 STATIC ${TFLITE_SRCS})
-target_include_directories(tensorflow-lite-2.3.0 SYSTEM PUBLIC ${TFLITE_INCLUDES})
-target_include_directories(tensorflow-lite-2.3.0 PRIVATE ${CpuInfoSource_DIR})
-target_compile_definitions(tensorflow-lite-2.3.0 PUBLIC "GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK -DTFLITE_WITH_RUY -DTFLITE_WITH_RUY_GEMV -DRUY_HAVE_CPUINFO")
-set_property(TARGET tensorflow-lite-2.3.0 PROPERTY POSITION_INDEPENDENT_CODE ON)
-target_link_libraries(tensorflow-lite-2.3.0 eigen ${LIB_PTHREAD} dl cpuinfo)
-if(NOT ANDROID AND ${BUILD_WITH_NNAPI})
- target_link_libraries(tensorflow-lite-2.3.0 rt)
-endif()
-
-if(ANDROID)
- target_link_libraries(tensorflow-lite-2.3.0 log)
- target_include_directories(tensorflow-lite-2.3.0 PUBLIC "${NDK_DIR}/..")
-endif()
--- /dev/null
+# Reference: https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/lite/tools/make/Makefile
+#
+# Tensorflow Lite library 2.3.0
+#
+set(TENSORFLOW_LITE_BASE ${TensorFlowSource_DIR}/tensorflow/lite)
+
+file(GLOB TFLITE_CORE_SRCS "${TENSORFLOW_LITE_BASE}/*.c"
+ "${TENSORFLOW_LITE_BASE}/*.cc"
+ "${TENSORFLOW_LITE_BASE}/core/*.cc")
+
+file(GLOB_RECURSE TFLITE_KERNEL_SRCS "${TENSORFLOW_LITE_BASE}/kernels/*.cc")
+
+file(GLOB TFLITE_LIB_SRCS "${TENSORFLOW_LITE_BASE}/c/*.c" "${TENSORFLOW_LITE_BASE}/c/*.cc")
+
+file(GLOB TFLITE_API_SRCS "${TENSORFLOW_LITE_BASE}/core/api/*.c"
+ "${TENSORFLOW_LITE_BASE}/core/api/*.cc")
+
+list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/memory_info.cc")
+list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/time.cc")
+
+file(GLOB TFLITE_EXPERIMENTAL_SRCS "${TENSORFLOW_LITE_BASE}/experimental/resource/*.cc")
+
+file(GLOB TFLITE_SPARSITY_SRCS "${TENSORFLOW_LITE_BASE}/tools/optimize/sparsity/*.cc")
+
+list(APPEND TFLITE_SRCS ${TFLITE_CORE_SRCS})
+list(APPEND TFLITE_SRCS ${TFLITE_KERNEL_SRCS})
+list(APPEND TFLITE_SRCS ${TFLITE_LIB_SRCS})
+list(APPEND TFLITE_SRCS ${TFLITE_API_SRCS})
+list(APPEND TFLITE_SRCS ${TFLITE_PROFILING_SRCS})
+list(APPEND TFLITE_SRCS ${TFLITE_EXPERIMENTAL_SRCS})
+list(APPEND TFLITE_SRCS ${TFLITE_SPARSITY_SRCS})
+
+# externals
+list(APPEND TFLITE_SRCS "${OouraFFTSource_DIR}/fftsg.c")
+list(APPEND TFLITE_SRCS "${OouraFFTSource_DIR}/fftsg2d.c")
+
+# Build with mmap? true
+# caution: v2.3.0's Makefile has wrong code on this part. This is fixed on master branch.
+set(BUILD_WITH_MMAP TRUE)
+if(${BUILD_WITH_MMAP})
+ list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/mmap_allocation_disabled.cc")
+else()
+ list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/mmap_allocation.cc")
+endif()
+
+# Build with nnapi? true
+# caution: this nnapi delegate comes from tflite, not ours.
+set(BUILD_WITH_NNAPI TRUE)
+if(${BUILD_WITH_NNAPI})
+ list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/nnapi_delegate.cc")
+ list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/quant_lstm_sup.cc")
+ list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_implementation.cc")
+ list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_util.cc")
+else()
+ list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/nnapi_delegate_disabled.cc")
+ list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_implementation_disabled.cc")
+endif()
+
+# ios: we don't support ios
+list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/minimal_logging_ios.cc")
+
+# android
+if(NOT ANDROID)
+ list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/minimal_logging_android.cc")
+endif()
+
+# exclude some source files
+file(GLOB_RECURSE TFLITE_EXCLS "${TENSORFLOW_LITE_BASE}/*test*.cc"
+ "${TENSORFLOW_LITE_BASE}/*benchmark*.cc"
+ "${TENSORFLOW_LITE_BASE}/*example*.cc"
+ "${TENSORFLOW_LITE_BASE}/*tool*.cc")
+list(REMOVE_ITEM TFLITE_SRCS ${TFLITE_EXCLS})
+
+# include headers
+list(APPEND TFLITE_INCLUDES "${TensorFlowSource_DIR}")
+list(APPEND TFLITE_INCLUDES "${TensorFlowGEMMLowpSource_DIR}")
+list(APPEND TFLITE_INCLUDES "${Fp16Source_DIR}/include")
+
+if(NEON2SSESource_FOUND)
+ list(APPEND TFLITE_INCLUDES "${NEON2SSESource_DIR}")
+endif(NEON2SSESource_FOUND)
+
+add_library(tensorflow-lite-2.3.0 STATIC ${TFLITE_SRCS})
+target_include_directories(tensorflow-lite-2.3.0 SYSTEM PUBLIC ${TFLITE_INCLUDES})
+target_include_directories(tensorflow-lite-2.3.0 PRIVATE ${CpuInfoSource_DIR})
+target_compile_definitions(tensorflow-lite-2.3.0 PUBLIC "GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK -DTFLITE_WITH_RUY -DTFLITE_WITH_RUY_GEMV -DRUY_HAVE_CPUINFO")
+set_property(TARGET tensorflow-lite-2.3.0 PROPERTY POSITION_INDEPENDENT_CODE ON)
+target_link_libraries(tensorflow-lite-2.3.0 eigen flatbuffers::flatbuffers ruy abseil farmhash ${LIB_PTHREAD} dl)
+if(NOT ANDROID AND ${BUILD_WITH_NNAPI})
+ target_link_libraries(tensorflow-lite-2.3.0 rt)
+endif()
+
+if(ANDROID)
+ target_link_libraries(tensorflow-lite-2.3.0 log)
+ target_include_directories(tensorflow-lite-2.3.0 PUBLIC "${NDK_DIR}/..")
+endif()
--- /dev/null
+if(BUILD_TENSORFLOW_LITE_2_3_0)
+ macro(return_unless VAR)
+ if(NOT ${VAR})
+ message("TFLiteVanillaRun: ${VAR} NOT TRUE")
+ set(TensorFlowLite_2_3_0_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT ${VAR})
+ endmacro(return_unless)
+
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ nnas_find_package(TensorFlowSource EXACT 2.3.0 QUIET)
+ return_unless(TensorFlowSource_FOUND)
+
+ # Below urls come from https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/tensorflow/workspace.bzl
+ nnas_find_package(AbseilSource QUIET)
+ return_unless(AbseilSource_FOUND)
+ nnfw_find_package(Eigen QUIET)
+ return_unless(Eigen_FOUND)
+ nnas_find_package(Farmhash QUIET)
+ return_unless(Farmhash_FOUND)
+ nnfw_find_package(FlatBuffers QUIET)
+ return_unless(FlatBuffers_FOUND)
+ nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.3.0 QUIET)
+ return_unless(TensorFlowGEMMLowpSource_FOUND)
+ nnas_find_package(OouraFFTSource QUIET)
+ return_unless(OouraFFTSource_FOUND)
+ nnfw_find_package(Ruy QUIET)
+ return_unless(Ruy_FOUND)
+
+ # TensorFlow Lite requires FP16 library's header only
+ nnas_find_package(Fp16Source QUIET)
+ return_unless(Fp16Source_FOUND)
+
+ # Optional packages
+ nnas_find_package(NEON2SSESource QUIET)
+
+ nnas_include(ExternalProjectTools)
+ add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/TensorFlowLite" tflite-2.3.0)
+
+ set(TensorFlowLite_2_3_0_FOUND TRUE)
+ return()
+endif()
--- /dev/null
+set(PACKAGE_VERSION "2.3.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+ set(PACKAGE_VERSION_EXACT TRUE)
+ set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+++ /dev/null
-if(BUILD_TENSORFLOW_LITE_2_3_0)
- macro(return_unless VAR)
- if(NOT ${VAR})
- message("${VAR} NOT TRUE")
- set(TensorFlowLite_2_3_0_FOUND PARENT_SCOPE)
- return()
- endif(NOT ${VAR})
- endmacro(return_unless)
-
- nnas_include(ExternalSourceTools)
- nnas_include(OptionTools)
-
- # Below urls come from https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/lite/tools/make/Makefile
-
- set(absl_url "https://github.com/abseil/abseil-cpp/archive/df3ea785d8c30a9503321a3d35ee7d35808f190d.tar.gz")
- ExternalSource_Download("TFLiteVanilla_Absl" ${absl_url})
- set(TFLiteVanillaAbslSource_DIR "${TFLiteVanilla_Absl_SOURCE_DIR}")
- if (NOT TFLiteVanillaAbslSource_DIR STREQUAL "")
- set(TFLiteVanillaAbslSource_FOUND TRUE)
- endif()
- return_unless(TFLiteVanillaAbslSource_FOUND)
-
- set(eigen_url "https://gitlab.com/libeigen/eigen/-/archive/386d809bde475c65b7940f290efe80e6a05878c4/eigen-386d809bde475c65b7940f290efe80e6a05878c4.tar.gz")
- ExternalSource_Download("TFLiteVanilla_Eigen" ${eigen_url})
- set(TFLiteVanillaEigenSource_DIR "${TFLiteVanilla_Eigen_SOURCE_DIR}")
- if (NOT TFLiteVanillaEigenSource_DIR STREQUAL "")
- set(TFLiteVanillaEigenSource_FOUND TRUE)
- endif()
- return_unless(TFLiteVanillaEigenSource_FOUND)
-
- set(farmhash_url "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz")
- ExternalSource_Download("TFLiteVanilla_Farmhash" ${farmhash_url})
- set(TFLiteVanillaFarmhashSource_DIR "${TFLiteVanilla_Farmhash_SOURCE_DIR}")
- if (NOT TFLiteVanillaFarmhashSource_DIR STREQUAL "")
- set(TFLiteVanillaFarmhashSource_FOUND TRUE)
- endif()
- return_unless(TFLiteVanillaFarmhashSource_FOUND)
-
- set(fft2d_url "https://storage.googleapis.com/mirror.tensorflow.org/github.com/petewarden/OouraFFT/archive/v1.0.tar.gz")
- ExternalSource_Download("TFLiteVanilla_FFT2D" ${fft2d_url})
- set(TFLiteVanillaFFT2DSource_DIR "${TFLiteVanilla_FFT2D_SOURCE_DIR}")
- if (NOT TFLiteVanillaFFT2DSource_DIR STREQUAL "")
- set(TFLiteVanillaFFT2DSource_FOUND TRUE)
- endif()
- return_unless(TFLiteVanillaFFT2DSource_FOUND)
-
- set(flatbuffers_url "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/flatbuffers/archive/v1.12.0.tar.gz")
- ExternalSource_Download("TFLiteVanilla_FlatBuffers" ${flatbuffers_url})
- set(TFLiteVanillaFlatBuffersSource_DIR "${TFLiteVanilla_FlatBuffers_SOURCE_DIR}")
- if (NOT TFLiteVanillaFlatBuffersSource_DIR STREQUAL "")
- set(TFLiteVanillaFlatBuffersSource_FOUND TRUE)
- endif()
- return_unless(TFLiteVanillaFlatBuffersSource_FOUND)
-
- set(fp16_url "https://github.com/Maratyszcza/FP16/archive/4dfe081cf6bcd15db339cf2680b9281b8451eeb3.zip")
- ExternalSource_Download("TFLiteVanilla_FP16" ${fp16_url})
- set(TFLiteVanillaFP16Source_DIR "${TFLiteVanilla_FP16_SOURCE_DIR}")
- if (NOT TFLiteVanillaFP16Source_DIR STREQUAL "")
- set(TFLiteVanillaFP16Source_FOUND TRUE)
- endif()
- return_unless(TFLiteVanillaFP16Source_FOUND)
-
- set(gemmlowp_url "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip")
- ExternalSource_Download("TFLiteVanilla_GEMMLowp" ${gemmlowp_url})
- set(TFLiteVanillaGEMMLowpSource_DIR "${TFLiteVanilla_GEMMLowp_SOURCE_DIR}")
- if (NOT TFLiteVanillaGEMMLowpSource_DIR STREQUAL "")
- set(TFLiteVanillaGEMMLowpSource_FOUND TRUE)
- endif()
- return_unless(TFLiteVanillaGEMMLowpSource_FOUND)
-
- set(neon2sse_url "https://github.com/intel/ARM_NEON_2_x86_SSE/archive/1200fe90bb174a6224a525ee60148671a786a71f.tar.gz")
- ExternalSource_Download("TFLiteVanilla_NEON2SSE" ${neon2sse_url})
- set(TFLiteVanillaNEON2SSESource_DIR "${TFLiteVanilla_NEON2SSE_SOURCE_DIR}")
- if (NOT TFLiteVanillaNEON2SSESource_DIR STREQUAL "")
- set(TFLiteVanillaNEON2SSESource_FOUND TRUE)
- endif()
- return_unless(TFLiteVanillaNEON2SSESource_FOUND)
-
- set(tensorflow_url "https://github.com/tensorflow/tensorflow/archive/v2.3.0.tar.gz")
- ExternalSource_Download("TFLiteVanilla_TensorFlow" ${tensorflow_url})
- set(TFLiteVanillaTensorFlowSource_DIR "${TFLiteVanilla_TensorFlow_SOURCE_DIR}")
- if (NOT TFLiteVanillaTensorFlowSource_DIR STREQUAL "")
- set(TFLiteVanillaTensorFlowSource_FOUND TRUE)
- endif()
- return_unless(TFLiteVanillaTensorFlowSource_FOUND)
-
- set(ruy_url "https://github.com/google/ruy/archive/34ea9f4993955fa1ff4eb58e504421806b7f2e8f.zip")
- ExternalSource_Download("TFLiteVanilla_Ruy" ${ruy_url})
- set(TFLiteVanillaRuySource_DIR "${TFLiteVanilla_Ruy_SOURCE_DIR}")
- if (NOT TFLiteVanillaRuySource_DIR STREQUAL "")
- set(TFLiteVanillaRuySource_FOUND TRUE)
- endif()
- return_unless(TFLiteVanillaRuySource_FOUND)
-
- nnfw_find_package(CpuInfo QUIET)
- if (NOT CpuInfo_FOUND)
- message(STATUS "TFLiteVanillaRun: CPUINFO not found")
- set(TensorFlowLite_2_3_0_FOUND FALSE PARENT_SCOPE)
- return()
- endif(NOT CpuInfo_FOUND)
-
- nnas_include(ExternalProjectTools)
- add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/TensorFlowLite-2.3.0" tflite-2.3.0)
-
- set(TensorFlowLite_2_3_0_FOUND TRUE)
- return()
-endif()
set(Xnnpack_FOUND TRUE PARENT_SCOPE)
endfunction(_Xnnpack_Build)
+string(REGEX REPLACE "-flto" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+string(REGEX REPLACE "-flto" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+
if(BUILD_XNNPACK)
_Xnnpack_Build()
else(BUILD_XNNPACK)
fi
# The default preset
-PRESET="20200630"
+PRESET="20210406"
EXTRA_OPTIONS=()
while [ "$#" -ne 0 ]; do
{
REQUIRED_UNITS=()
# Common Libraries
- REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp" "stdex")
+ REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
REQUIRED_UNITS+=("oops" "pepper-assert" "foder")
REQUIRED_UNITS+=("souschef")
REQUIRED_UNITS+=("safemain")
{
REQUIRED_UNITS=()
# Common Libraries
- REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp" "stdex")
+ REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
REQUIRED_UNITS+=("oops" "pepper-assert" "foder")
REQUIRED_UNITS+=("souschef")
REQUIRED_UNITS+=("safemain")
--- /dev/null
+#!/bin/bash
+
+# NOTE purpose of this file is static analysis only
+# new official preset will be added when new programs are ready
+
+PRESET="20210406"
+
+function preset_configure()
+{
+ REQUIRED_UNITS=()
+ # Common Libraries
+ REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+ REQUIRED_UNITS+=("oops" "pepper-assert" "foder" "crew")
+ REQUIRED_UNITS+=("souschef")
+ REQUIRED_UNITS+=("safemain")
+ REQUIRED_UNITS+=("arser")
+ REQUIRED_UNITS+=("vconone")
+ # Hermes Logging Framework
+ REQUIRED_UNITS+=("hermes" "hermes-std")
+ # loco IR and related utilities
+ REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+ # Flatbuffer I/O
+ REQUIRED_UNITS+=("mio-tflite" "mio-circle")
+ # Circle compiler library (.circle -> .circle)
+ REQUIRED_UNITS+=("luci")
+ # Tools
+ REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef" "circlechef")
+ REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify")
+ REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
+ REQUIRED_UNITS+=("circle-partitioner")
+ REQUIRED_UNITS+=("one-cmds")
+ REQUIRED_UNITS+=("bcq-tools")
+
+ NPROC=${NPROC:-$(cat /proc/cpuinfo | grep -c processor)}
+
+ # TODO Use "nncc configure" and "nncc build"
+ cmake \
+ -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+ -DCMAKE_BUILD_TYPE=release \
+ -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+ -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+ ${EXTRA_OPTIONS[@]} \
+ "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+ install -t "${NNPKG_INSTALL_PREFIX}/bin" -D \
+ "${NNAS_PROJECT_PATH}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh"
+
+ # Install tf2nnpkg
+ install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.${PRESET}" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+}
--- /dev/null
+#!/bin/bash
+
+function preset_configure()
+{
+ REQUIRED_UNITS=()
+ # Common Libraries
+ REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+ REQUIRED_UNITS+=("oops" "pepper-assert" "foder" "crew")
+ REQUIRED_UNITS+=("souschef")
+ REQUIRED_UNITS+=("safemain")
+ REQUIRED_UNITS+=("arser")
+ REQUIRED_UNITS+=("vconone")
+ # Hermes Logging Framework
+ REQUIRED_UNITS+=("hermes" "hermes-std")
+ # loco IR and related utilities
+ REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+ # Flatbuffer I/O
+ REQUIRED_UNITS+=("mio-tflite" "mio-circle")
+ # Circle compiler library (.circle -> .circle)
+ REQUIRED_UNITS+=("luci")
+ # Tools
+ REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef" "circlechef")
+ REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify")
+ REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
+ REQUIRED_UNITS+=("circle-partitioner")
+ REQUIRED_UNITS+=("one-cmds")
+ REQUIRED_UNITS+=("bcq-tools")
+
+ NPROC=$(cat /proc/cpuinfo | grep -c processor)
+
+ # TODO Use "nncc configure" and "nncc build"
+ cmake \
+ -G "MSYS Makefiles" \
+ -DUSE_PROTOBUF_LEGACY_IMPORT=ON \
+ -DCMAKE_EXE_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+ -DCMAKE_SHARED_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+ -DENABLE_TEST=OFF \
+ -DDOWNLOAD_GTEST=OFF \
+ -DBUILD_GTEST=OFF \
+ -DCMAKE_C_COMPILER=gcc \
+ -DCMAKE_CXX_COMPILER=g++ \
+ -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+ -DCMAKE_BUILD_TYPE=release \
+ -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+ -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+ ${EXTRA_OPTIONS[@]} \
+ "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+ # Install libraries to bin/ for Windows release
+ mv ${NNCC_INSTALL_PREFIX}/lib/*.dll ${NNCC_INSTALL_PREFIX}/bin
+ rm -rf ${NNCC_INSTALL_PREFIX}/lib
+
+ install -t "${NNPKG_INSTALL_PREFIX}/bin" -D \
+ "${NNAS_PROJECT_PATH}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh"
+
+ # Install tf2nnpkg
+ install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.20210406" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+
+ # Though you have to install tensorflow to run 'tf2tfliteV2',
+ # tensorflow can't be installed in mingw. First, You can install tensorflow
+ # from Window native CMD(run as administrator) with python virtual environment.
+ # And, you must copy it to "${NNAS_INSTALL_PREFIX}/bin/venv"
+}
"${ROOT}/bin/tflite2circle" "${TMPDIR}/${MODEL_NAME}.tflite" "${TMPDIR}/${MODEL_NAME}.tmp.circle"
# optimize
-"${ROOT}/bin/circle2circle" --all "${TMPDIR}/${MODEL_NAME}.tmp.circle" "${TMPDIR}/${MODEL_NAME}.circle"
+"${ROOT}/bin/circle2circle" --O1 "${TMPDIR}/${MODEL_NAME}.tmp.circle" "${TMPDIR}/${MODEL_NAME}.circle"
"${ROOT}/bin/model2nnpkg.sh" -o "${OUTPUT_DIR}" "${TMPDIR}/${MODEL_NAME}.circle"
--- /dev/null
+#!/bin/bash
+
+set -e
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+
+command_exists() {
+ if [ "$#" -le 0 ]; then
+ return 1
+ fi
+ command -v "$@" > /dev/null 2>&1
+}
+
+usage()
+{
+ echo "Convert TensorFlow model to nnpackage."
+ echo "Usage: tf2nnpkg"
+ echo " --info <path/to/info>"
+ echo " --graphdef <path/to/pb>"
+ echo " -o <path/to/nnpkg/directory>"
+ echo " --v2 (optional) Use TF 2.x interface"
+ exit 255
+}
+
+TF_INTERFACE="--v1"
+
+# Parse command-line arguments
+#
+while [ "$#" -ne 0 ]; do
+ CUR="$1"
+
+ case $CUR in
+ '--help')
+ usage
+ ;;
+ '--info')
+ export INFO_FILE="$2"
+ shift 2
+ ;;
+ '--graphdef')
+ export GRAPHDEF_FILE="$2"
+ shift 2
+ ;;
+ '-o')
+ export OUTPUT_DIR="$2"
+ shift 2
+ ;;
+ '--v2')
+ TF_INTERFACE="--v2"
+ shift
+ ;;
+ *)
+ echo "${CUR}"
+ shift
+ ;;
+ esac
+done
+
+if [ -z ${GRAPHDEF_FILE} ] || [ ! -e ${GRAPHDEF_FILE} ]; then
+ echo "pb is not found. Please check --graphdef is correct."
+ exit 2
+fi
+
+if [ -z ${INFO_FILE} ] || [ ! -e ${INFO_FILE} ]; then
+ echo "info is not found. Please check --info is correct."
+ exit 2
+fi
+
+if [ -z ${OUTPUT_DIR} ]; then
+ echo "output directory is not specifed. Please check -o is correct.."
+ exit 2
+fi
+
+FILE_BASE=$(basename ${GRAPHDEF_FILE})
+MODEL_NAME="${FILE_BASE%.*}"
+TMPDIR=$(mktemp -d)
+trap "{ rm -rf $TMPDIR; }" EXIT
+
+# activate python virtual environment
+VIRTUALENV_LINUX="${ROOT}/bin/venv/bin/activate"
+VIRTUALENV_WINDOWS="${ROOT}/bin/venv/Scripts/activate"
+
+if [ -e ${VIRTUALENV_LINUX} ]; then
+ source ${VIRTUALENV_LINUX}
+elif [ -e ${VIRTUALENV_WINDOWS} ]; then
+ source ${VIRTUALENV_WINDOWS}
+fi
+
+# parse inputs, outputs from info file
+INPUT=$(awk -F, '/^input/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s)
+OUTPUT=$(awk -F, '/^output/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s)
+
+INPUT_SHAPES=$(grep ^input ${INFO_FILE} | cut -d "[" -f2 | cut -d "]" -f1 | tr -d ' ' | xargs | tr ' ' ':')
+
+# Generate BCQ information metadata
+# If model has no BCQ information or invalid information, pb file is not changed.
+"${ROOT}/bin/generate_bcq_metadata" \
+--input_path "${GRAPHDEF_FILE}" \
+--output_path "${TMPDIR}/${MODEL_NAME}_withmeta.pb" \
+--output_arrays "${OUTPUT}"
+
+# Generate BCQ information nodes as output_arrays
+# If model has no BCQ information, output_arrays would be empty.
+"${ROOT}/bin/generate_bcq_output_arrays" \
+--input_path "${TMPDIR}/${MODEL_NAME}_withmeta.pb" \
+--metadata_path "${TMPDIR}/${MODEL_NAME}_metadata_arrays.txt" \
+--output_arrays_path "${TMPDIR}/${MODEL_NAME}_output_arrays.txt"
+
+# generate tflite file
+TF2TFLITE_CONVERT_SCRIPT="python ${ROOT}/bin/tf2tfliteV2.py ${TF_INTERFACE} "
+TF2TFLITE_CONVERT_SCRIPT+="--input_path ${TMPDIR}/${MODEL_NAME}_withmeta.pb "
+TF2TFLITE_CONVERT_SCRIPT+="--input_arrays ${INPUT} "
+TF2TFLITE_CONVERT_SCRIPT+="--output_path ${TMPDIR}/${MODEL_NAME}.tflite "
+TF2TFLITE_CONVERT_SCRIPT+="--output_arrays "
+TF2TFLITE_CONVERT_SCRIPT+="$(cat ${TMPDIR}/${MODEL_NAME}_metadata_arrays.txt)"
+TF2TFLITE_CONVERT_SCRIPT+="${OUTPUT}"
+TF2TFLITE_CONVERT_SCRIPT+="$(cat ${TMPDIR}/${MODEL_NAME}_output_arrays.txt) "
+if [ ! -z ${INPUT_SHAPES} ]; then
+ TF2TFLITE_CONVERT_SCRIPT+="--input_shapes ${INPUT_SHAPES} "
+fi
+
+${TF2TFLITE_CONVERT_SCRIPT}
+
+# convert .tflite to .circle
+"${ROOT}/bin/tflite2circle" "${TMPDIR}/${MODEL_NAME}.tflite" "${TMPDIR}/${MODEL_NAME}.tmp.circle"
+
+# optimize
+"${ROOT}/bin/circle2circle" --O1 "${TMPDIR}/${MODEL_NAME}.tmp.circle" "${TMPDIR}/${MODEL_NAME}.circle"
+
+"${ROOT}/bin/model2nnpkg.sh" -o "${OUTPUT_DIR}" "${TMPDIR}/${MODEL_NAME}.circle"
# prepare pre-built armcompute library
# android build requires pre-built armcompute library
-if [ ! -n "$EXT_ACL_FOLDER" ]; then
- echo "Please set EXT_ACL_FOLDER to use pre-built armcompute library"
- exit 1
-fi
+# if [ ! -n "$EXT_ACL_FOLDER" ]; then
+# echo "Please set EXT_ACL_FOLDER to use pre-built armcompute library"
+# exit 1
+# fi
+
+unset EXT_ACL_FOLDER
# prepare ndk
if [ ! -n "$NDK_DIR" ]; then
export BACKENDS=$1
if [[ "$2" == "" ]]; then
- $INSTALL_PATH/test/onert-test verify-tflite --api=nnapi \
+ $INSTALL_PATH/test/onert-test verify-tflite --api=loader \
--reportdir=$ROOT_PATH/$3
else
- $INSTALL_PATH/test/onert-test verify-tflite --api=nnapi \
+ $INSTALL_PATH/test/onert-test verify-tflite --api=loader \
--list=$2 \
--reportdir=$ROOT_PATH/$3
fi
# $2: (required) test list file relative path from nnfw root directory
# pass empty string if there is no skiplist
# $3: (required) relative path to report from nnfw root directory
-function TFLiteLoaderTest()
+function NNAPIFrontendTest()
{
- [[ $# -ne 3 ]] && echo "TFLiteLoaderTest: Invalid function argument setting" && exit 1
+ [[ $# -ne 3 ]] && echo "NNAPIFrontendTest: Invalid function argument setting" && exit 1
pushd ${ROOT_PATH} > /dev/null
export BACKENDS=$1
if [[ "$2" == "" ]]; then
- $INSTALL_PATH/test/onert-test verify-tflite --api=loader \
+ $INSTALL_PATH/test/onert-test verify-tflite --api=nnapi \
--reportdir=$ROOT_PATH/$3
else
- $INSTALL_PATH/test/onert-test verify-tflite --api=loader \
+ $INSTALL_PATH/test/onert-test verify-tflite --api=nnapi \
--list=$2 \
--reportdir=$ROOT_PATH/$3
fi
# Don't run this script
[[ "${BASH_SOURCE[0]}" == "${0}" ]] && echo "Please don't execute ${BASH_SOURCE[0]}, source it" && return
-DEBUG_BUILD_ITEMS="angkor;cwrap;pepper-str;pepper-strcast;pp;stdex"
+DEBUG_BUILD_ITEMS="angkor;cwrap;pepper-str;pepper-strcast;pp"
DEBUG_BUILD_ITEMS+=";oops;pepper-assert"
DEBUG_BUILD_ITEMS+=";hermes;hermes-std"
DEBUG_BUILD_ITEMS+=";loco;locop;locomotiv;logo-core;logo"
-DEBUG_BUILD_ITEMS+=";foder;souschef;arser;vconone"
+DEBUG_BUILD_ITEMS+=";foder;crew;souschef;arser;vconone"
DEBUG_BUILD_ITEMS+=";safemain;mio-circle;mio-tflite"
DEBUG_BUILD_ITEMS+=";tflite2circle"
DEBUG_BUILD_ITEMS+=";luci"
DEBUG_BUILD_ITEMS+=";luci-interpreter"
-DEBUG_BUILD_ITEMS+=";luci-value-test"
+DEBUG_BUILD_ITEMS+=";luci-eval-driver;luci-pass-value-test;luci-value-test"
DEBUG_BUILD_ITEMS+=";circle2circle;record-minmax;circle-quantizer"
+DEBUG_BUILD_ITEMS+=";circle-partitioner;circle-part-driver"
DEBUG_BUILD_ITEMS+=";circle-verify"
+DEBUG_BUILD_ITEMS+=";circle-tensordump"
DEBUG_BUILD_ITEMS+=";tflchef;circlechef"
DEBUG_BUILD_ITEMS+=";common-artifacts"
DEBUG_BUILD_ITEMS+=";circle2circle-dredd-recipe-test"
DEBUG_BUILD_ITEMS+=";record-minmax-conversion-test"
DEBUG_BUILD_ITEMS+=";tf2tfliteV2;tf2tfliteV2-conversion-test"
DEBUG_BUILD_ITEMS+=";tflite2circle-conversion-test"
+DEBUG_BUILD_ITEMS+=";pota-quantization-value-test"
+DEBUG_BUILD_ITEMS+=";circle-part-value-test"
fi
# docker image name
-# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial"
# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
+# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
if [[ -z $DOCKER_IMAGE_NAME ]]; then
echo "It will use default docker image name"
fi
fi
# docker image name
-# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial"
# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
+# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
if [[ -z $DOCKER_IMAGE_NAME ]]; then
echo "It will use default docker image name"
fi
fi
# docker image name
-# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial"
# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
+# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
if [[ -z $DOCKER_IMAGE_NAME ]]; then
echo "It will use default docker image name"
fi
fi
# docker image name
-# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial"
# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
+# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
if [[ -z $DOCKER_IMAGE_NAME ]]; then
echo "It will use default docker image name"
fi
fi
# docker image name
-# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial"
# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
+# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
if [[ -z $DOCKER_IMAGE_NAME ]]; then
echo "It will use default docker image name"
fi
./nncc docker-run ./nnas create-package --prefix "${PWD}/${NNCC_INSTALL_PREFIX}" -- "${CONFIG_OPTIONS}"
mkdir -p ${ARCHIVE_PATH}
-tar -zcf ${ARCHIVE_PATH}/nncc-package.tar.gz -C ${NNCC_INSTALL_PREFIX} --exclude test ./
+tar -zcf ${ARCHIVE_PATH}/nncc-package.tar.gz -C ${NNCC_INSTALL_PREFIX} --exclude test --exclude tflchef* ./
tar -zcf ${ARCHIVE_PATH}/nncc-test-package.tar.gz -C ${NNCC_INSTALL_PREFIX} ./test
popd > /dev/null
fi
# docker image name
-# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial"
# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
+# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
if [[ -z $DOCKER_IMAGE_NAME ]]; then
echo "It will use default docker image name"
fi
fi
# docker image name
-# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial"
# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
+# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
if [[ -z $DOCKER_IMAGE_NAME ]]; then
echo "It will use default docker image name"
fi
fi
# docker image name
-# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial"
# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
+# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
if [[ -z $DOCKER_IMAGE_NAME ]]; then
echo "It will use default docker image name"
fi
REQUIRED_UNITS=()
# Common Libraries
-REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp" "stdex")
-REQUIRED_UNITS+=("oops" "safemain" "foder" "arser" "vconone")
+REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+REQUIRED_UNITS+=("oops" "safemain" "foder" "crew" "arser" "vconone")
# Hermes Logging Framework
REQUIRED_UNITS+=("hermes" "hermes-std")
# loco IR and related utilities
ROOT_PATH="$CURRENT_PATH/../../"
# docker image name
-# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial"
# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
+# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
if [[ -z $DOCKER_IMAGE_NAME ]]; then
echo "It will use default docker image name"
fi
fi
export GCOV_PREFIX_STRIP=`cat $ROOT_PATH/tests/scripts/build_path_depth.txt`
-./infra/scripts/test_ubuntu_runtime.sh --backend acl_cl --tflite-loader
+TENSOR_LOGGING=trace_log.txt ./infra/scripts/test_ubuntu_runtime.sh --backend acl_cl --nnapi-frontend
./infra/scripts/test_ubuntu_runtime.sh --backend acl_neon
./infra/scripts/test_ubuntu_runtime.sh --backend cpu
# Enable all logs (mixed backend)
-TENSOR_LOGGING=trace_log.txt ONERT_LOG_ENABLE=1 GRAPH_DOT_DUMP=1 ./infra/scripts/test_ubuntu_runtime_mixed.sh
+ONERT_LOG_ENABLE=1 GRAPH_DOT_DUMP=1 ./infra/scripts/test_ubuntu_runtime_mixed.sh
# Enable trace event (acl_cl default backend)
export TRACE_FILEPATH=trace.json
-TFLiteModelVerification "acl_cl" "Product/out/test/list/frameworktest_list.armv7l.acl_cl.txt" "report/acl_cl/trace"
+TFLiteModelVerification "acl_cl" "Product/out/test/list/tflite_comparator.armv7l.acl_cl.list" "report/acl_cl/trace"
unset TRACE_FILEPATH
# Interpreter
BACKEND="cpu"
TEST_OS="linux"
TEST_PLATFORM="$TEST_ARCH-$TEST_OS"
-TFLITE_LOADER="0"
+TFLITE_LOADER="1"
LINEAR_ONLY="0"
RUN_INTERP="0"
+NNAPI_FRONTEND="0"
function Usage()
{
echo ""
echo "Options:"
echo " --backend <BACKEND> Runtime backend to test (default: ${BACKEND})"
- echo " --tflite-loader Enable TFLite Loader test"
+ echo " --nnapi-frontend NNAPI Frontend test"
echo " --linear-only Use Linear executor only"
}
;;
--tflite-loader)
TFLITE_LOADER="1"
+ NNAPI_FRONTEND="1" # For CI test
+ echo "[INFO] \"--tflite-loader\" argument is deprecated"
+ shift
+ ;;
+ --nnapi-frontend)
+ NNAPI_FRONTEND="1"
shift
;;
--linear-only)
fi
UNITTEST_SKIPLIST="Product/out/unittest/nnapi_gtest.skip.${TEST_PLATFORM}.${BACKEND}"
-FRAMEWORK_TESTLIST="Product/out/test/list/frameworktest_list.${TEST_ARCH}.${BACKEND}.txt"
+TFLITE_TESTLIST="Product/out/test/list/tflite_comparator.${TEST_ARCH}.${BACKEND}.list"
REPORT_BASE="report/${BACKEND}"
EXECUTORS=("Linear" "Dataflow" "Parallel")
fi
NNAPIGTest "${BACKEND}" "${UNITTEST_SKIPLIST}" "${REPORT_PATH}"
- TFLiteModelVerification "${BACKEND}" "${FRAMEWORK_TESTLIST}" "${REPORT_PATH}"
+ TFLiteModelVerification "${BACKEND}" "${TFLITE_TESTLIST}" "${REPORT_PATH}"
if [ $EXECUTOR = "Interpreter" ]; then
unset DISABLE_COMPILE
fi
done
-# Current support acl_cl backend testlist only
# TODO Support more backends
-TFLITE_LOADER_TESTLIST="Product/out/test/list/tflite_loader_list.${TEST_ARCH}.txt"
-if [[ $TFLITE_LOADER = "1" ]]; then
- TFLiteLoaderTest "${BACKEND}" "${TFLITE_LOADER_TESTLIST}" "${REPORT_BASE}/loader/${EXECUTOR}"
+NNAPI_FRONTEND_TESTLIST="Product/out/test/list/nnapi_test.${TEST_ARCH}.list"
+if [[ $NNAPI_FRONTEND = "1" ]]; then
+ NNAPIFrontendTest "${BACKEND}" "${NNAPI_FRONTEND_TESTLIST}" "${REPORT_BASE}/nnapi/${EXECUTOR}"
fi
BACKENDS=(acl_cl acl_neon cpu)
# Get the intersect of framework test list files
-TESTLIST_PREFIX="Product/out/test/list/frameworktest_list.${TEST_ARCH}"
+TESTLIST_PREFIX="Product/out/test/list/tflite_comparator.${TEST_ARCH}"
SKIPLIST_PREFIX="Product/out/unittest/nnapi_gtest.skip.${TEST_ARCH}-${TEST_OS}"
-sort $TESTLIST_PREFIX.${BACKENDS[0]}.txt > $TESTLIST_PREFIX.intersect.txt
+sort $TESTLIST_PREFIX.${BACKENDS[0]}.list > $TESTLIST_PREFIX.intersect.list
sort $SKIPLIST_PREFIX.${BACKENDS[0]} > $SKIPLIST_PREFIX.union
for BACKEND in "${BACKENDS[@]:1}"; do
- comm -12 <(sort $TESTLIST_PREFIX.intersect.txt) <(sort $TESTLIST_PREFIX.$BACKEND.txt) > $TESTLIST_PREFIX.intersect.next.txt
+ comm -12 <(sort $TESTLIST_PREFIX.intersect.list) <(sort $TESTLIST_PREFIX.$BACKEND.list) > $TESTLIST_PREFIX.intersect.next.list
comm <(sort $SKIPLIST_PREFIX.union) <(sort $SKIPLIST_PREFIX.$BACKEND) | tr -d "[:blank:]" > $SKIPLIST_PREFIX.union.next
- mv $TESTLIST_PREFIX.intersect.next.txt $TESTLIST_PREFIX.intersect.txt
+ mv $TESTLIST_PREFIX.intersect.next.list $TESTLIST_PREFIX.intersect.list
mv $SKIPLIST_PREFIX.union.next $SKIPLIST_PREFIX.union
done
popd > /dev/null
export ACL_LAYOUT="NCHW"
export RUY_THREADS=4
NNAPIGTest "acl_cl;acl_neon;cpu" "Product/out/unittest/nnapi_gtest.skip.${TEST_ARCH}-${TEST_OS}.union" "report/mixed"
-TFLiteModelVerification "acl_cl;acl_neon;cpu" "${TESTLIST_PREFIX}.intersect.txt" "report/mixed"
+TFLiteModelVerification "acl_cl;acl_neon;cpu" "${TESTLIST_PREFIX}.intersect.list" "report/mixed"
$SDB_CMD push cache.tar.gz $TEST_ROOT/.
rm -rf cache.tar.gz
$SDB_CMD shell tar -zxf $TEST_ROOT/cache.tar.gz -C $TEST_ROOT/Product/out/test/models
-
- # download api test model file for nnfw_api_gtest
- MODEL_CACHE_DIR=$(mktemp -d)
- tests/scripts/models/run_test.sh --download=on --run=off \
- --configdir=tests/scripts/models/nnfw_api_gtest \
- --cachedir=$MODEL_CACHE_DIR
- tar -zcf $MODEL_CACHE_DIR/api_model_test.tar.gz -C $MODEL_CACHE_DIR .
- $SDB_CMD push $MODEL_CACHE_DIR/api_model_test.tar.gz $TEST_ROOT/Product/out/unittest_standalone/nnfw_api_gtest_models/
- $SDB_CMD shell tar -zxf $TEST_ROOT/Product/out/unittest_standalone/nnfw_api_gtest_models/api_model_test.tar.gz \
- -C $TEST_ROOT/Product/out/unittest_standalone/nnfw_api_gtest_models/
- rm -rf $MODEL_CACHE_DIR
popd
}
--- /dev/null
+# NNPackage example
+
+## Package version 1.1.0
+
+### one_op_in_tflite
+
+- Model file: TensorFlow Lite model
+- Only one `ADD` operation
+
+## Package version 1.0.0
+
+### add
+
+- Model file: TensorFlow Lite model
+- Only one `ADD` operation
+
+### add_invalid_manifest
+
+- Model file: TensorFlow Lite model
+- Only one `ADD` operation
+- Invalid menifest: invalid json format
+
+### if_dynamic
+
+- Model file: TensorFlow Lite model
+- `IF` operation example with input and output example
+
+### while_dynamic
+
+- Model file: TensorFlow Lite model
+- `WHILE` operation example with input and output example
+
+++ /dev/null
-{
- "major-version" : "1",
- "minor-version" : "1",
- "patch-version" : "0",
- "configs" : [ "config.cfg" ],
- "models" : [ "add.tflite" ],
- "model-types" : [ "tflite" ]
-}
+++ /dev/null
-BACKENDS="cpu"
--- /dev/null
+{
+ "major-version" : "1",
+ "minor-version" : "0",
+ "patch-version" : "0",
+ "models" : [ "add.tflite" ],
+ "model-types" : [ "tflite" ]
+}
--- /dev/null
+{
+ "major-version" : "1"
+ "minor-version" : "0"
+ "patch-version" : "0"
+ "models" : [ "add.tflite" ]
+ "model-types" : [ "tflite" ]
+}
--- /dev/null
+{
+ "major-version" : "1",
+ "minor-version" : "0",
+ "patch-version" : "0",
+ "models" : [ "if_dynamic.tflite" ],
+ "model-types" : [ "tflite" ]
+}
--- /dev/null
+{
+ "major-version" : "1",
+ "minor-version" : "0",
+ "patch-version" : "0",
+ "models" : [ "while_dynamic.tflite" ],
+ "model-types" : [ "tflite" ]
+}
--- /dev/null
+{
+ "major-version" : "1",
+ "minor-version" : "1",
+ "patch-version" : "0",
+ "configs" : [ "config.cfg" ],
+ "models" : [ "add.tflite" ],
+ "model-types" : [ "tflite" ]
+}
--- /dev/null
+BACKENDS="cpu"
Name: nnfw
Summary: nnfw
-Version: 1.12.0
+Version: 1.15.0
Release: 1
Group: Development
License: Apache-2.0 and MIT and BSD-2-Clause
%ifarch aarch64
%define target_arch aarch64
%endif
+%ifarch %ix86
+%define target_arch i686
+%endif
%define install_dir %{_prefix}
%define install_path %{buildroot}%{install_dir}
tar -xf %{SOURCE1011} -C ./externals
%build
-%ifarch arm armv7l aarch64 x86_64
+%ifarch arm armv7l aarch64 x86_64 %ix86
# runtime build
%{build_env} ./nnfw configure %{build_options} %{extra_option}
%{build_env} ./nnfw build -j4
%endif # arm armv7l aarch64
%install
-%ifarch arm armv7l aarch64 x86_64
+%ifarch arm armv7l aarch64 x86_64 %ix86
mkdir -p %{buildroot}%{_libdir}
mkdir -p %{buildroot}%{_bindir}
%files
%manifest %{name}.manifest
%defattr(-,root,root,-)
-%ifarch arm armv7l aarch64 x86_64
+%ifarch arm armv7l aarch64 x86_64 %ix86
%{_libdir}/*.so
%endif
%files devel
%manifest %{name}.manifest
%defattr(-,root,root,-)
-%ifarch arm armv7l aarch64 x86_64
+%ifarch arm armv7l aarch64 x86_64 %ix86
%dir %{_includedir}/nnfw
%{_includedir}/nnfw/*
%{_libdir}/pkgconfig/nnfw.pc
%files plugin-devel
%manifest %{name}.manifest
%defattr(-,root,root,-)
-%ifarch arm armv7l aarch64 x86_64
+%ifarch arm armv7l aarch64 x86_64 %ix86
%dir %{_includedir}/onert
%{_includedir}/onert/*
%{_libdir}/pkgconfig/nnfw-plugin.pc
%endif
-%ifarch arm armv7l aarch64 x86_64
+%ifarch arm armv7l aarch64 x86_64 %ix86
%files minimal-app
%manifest %{name}.manifest
%defattr(-,root,root,-)
--- /dev/null
+operand {
+ name: "bc_input"
+ type: FLOAT32
+ shape { dim: 2 dim: 3 }
+}
+operand {
+ name: "bc_shape"
+ type: INT32
+ shape { dim: 3 }
+ filler { tag: "explicit" arg: "1" arg: "2" arg: "3" }
+}
+operand {
+ name: "bc_ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 2 dim: 3 }
+}
+operation {
+ type: "BroadcastTo"
+ input: "bc_input"
+ input: "bc_shape"
+ output: "bc_ofm"
+}
+input: "bc_input"
+output: "bc_ofm"
--- /dev/null
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 3 dim: 3 }
+}
+
+operand {
+ name: "ifm2"
+ type: INT32
+ shape { }
+ filler {
+ tag: "constant"
+ arg: "-1"
+ }
+}
+
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 3 dim: 3 dim: 1 }
+}
+
+operation {
+ type: "ExpandDims"
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm"
+}
+input: "ifm1"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+
+operation {
+ type: "FakeQuant"
+ fakequant_options {
+ min: 0.0
+ max: 1.0
+ num_bits: 8
+ narrow_range: false
+ }
+ input: "ifm"
+ output: "ofm"
+}
+
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "bc_input"
+ type: FLOAT32
+ shape { dim: 2 dim: 3 }
+}
+operand {
+ name: "bc_shape"
+ type: INT32
+ shape { dim: 3 }
+ filler { tag: "explicit" arg: "1" arg: "2" arg: "3" }
+}
+operand {
+ name: "bc_ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 2 dim: 3 }
+}
+operation {
+ type: "BroadcastTo"
+ input: "bc_input"
+ input: "bc_shape"
+ output: "bc_ofm"
+}
+operand {
+ name: "reshape_data"
+ type: FLOAT32
+ shape { dim: 2 dim: 3 }
+}
+operand {
+ name: "reshape_shape"
+ type: INT32
+ shape { dim: 3 }
+ filler { tag: "explicit" arg: "1" arg: "2" arg: "3" }
+}
+operand {
+ name: "reshape_ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 2 dim: 3 }
+}
+operation {
+ type: "Reshape"
+ reshape_options {
+ new_shape: 1
+ new_shape: 2
+ new_shape: 3
+ }
+ input: "reshape_data"
+ input: "reshape_shape"
+ output: "reshape_ofm"
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 2 dim: 3 }
+}
+operation {
+ type: "AddV2"
+ input: "bc_ofm"
+ input: "reshape_ofm"
+ output: "ofm"
+}
+input: "bc_input"
+input: "reshape_data"
+output: "ofm"
--- /dev/null
+# To check if BroadcastTo and AddV2 are fused to Add op
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "ADD_EXIST" $(op_count ADD) '=' 1
+RULE "NO_BroadcastTo" $(op_count 'CUSTOM(BroadcastTo)') '=' 0
+RULE "NO_AddV2" $(op_count 'CUSTOM(AddV2)') '=' 0
--- /dev/null
+operand {
+ name: "bc_input"
+ type: INT64
+ shape { dim: 2 dim: 3 }
+}
+operand {
+ name: "bc_shape"
+ type: INT32
+ shape { dim: 3 }
+ filler { tag: "explicit" arg: "1" arg: "2" arg: "3" }
+}
+operand {
+ name: "bc_ofm"
+ type: INT64
+ shape { dim: 1 dim: 2 dim: 3 }
+}
+operation {
+ type: "BroadcastTo"
+ input: "bc_input"
+ input: "bc_shape"
+ output: "bc_ofm"
+}
+operand {
+ name: "reshape_data"
+ type: INT64
+ shape { dim: 2 dim: 3 }
+}
+operand {
+ name: "reshape_shape"
+ type: INT32
+ shape { dim: 3 }
+ filler { tag: "explicit" arg: "1" arg: "2" arg: "3" }
+}
+operand {
+ name: "reshape_ofm"
+ type: INT64
+ shape { dim: 1 dim: 2 dim: 3 }
+}
+operation {
+ type: "Reshape"
+ reshape_options {
+ new_shape: 1
+ new_shape: 2
+ new_shape: 3
+ }
+ input: "reshape_data"
+ input: "reshape_shape"
+ output: "reshape_ofm"
+}
+operand {
+ name: "ofm"
+ type: INT64
+ shape { dim: 1 dim: 2 dim: 3 }
+}
+operation {
+ type: "AddV2"
+ input: "bc_ofm"
+ input: "reshape_ofm"
+ output: "ofm"
+}
+input: "bc_input"
+input: "reshape_data"
+output: "ofm"
--- /dev/null
+# To check if BroadcastTo and AddV2 are not fused to Add op
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "BroadcastTo_EXIST" $(op_count 'CUSTOM(BroadcastTo)') '=' 1
+RULE "AddV2_EXIST" $(op_count 'CUSTOM(AddV2)') '=' 1
+RULE "NO_ADD" $(op_count ADD) '=' 0
--- /dev/null
+operand {
+ name: "ifm_conv"
+ type: FLOAT32
+ shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+ name: "filter"
+ type: FLOAT32
+ shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 64 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm_conv"
+ type: FLOAT32
+ shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+ name: "mul_const"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "add_const"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm_mul"
+ type: FLOAT32
+ shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+ name: "ofm_add"
+ type: FLOAT32
+ shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: VALID
+ stride_w: 2
+ stride_h: 2
+ }
+ input: "ifm_conv"
+ input: "filter"
+ input: "bias"
+ output: "ofm_conv"
+}
+operation {
+ type: "Mul"
+ input: "ofm_conv"
+ input: "mul_const"
+ output: "ofm_mul"
+ mul_options {
+ activation: NONE
+ }
+}
+operation {
+ type: "Add"
+ input: "ofm_mul"
+ input: "add_const"
+ output: "ofm_add"
+ add_options {
+ activation: NONE
+ }
+}
+input: "ifm_conv"
+output: "ofm_add"
--- /dev/null
+# To check if Add and Mul are fused to Convolution op
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "CONV_EXIST" $(op_count CONV_2D) '=' 1
+RULE "NO_MUL" $(op_count MUL) '=' 0
+RULE "NO_ADD" $(op_count ADD) '=' 0
--- /dev/null
+operand {
+ name: "ifm_conv"
+ type: FLOAT32
+ shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+ name: "filter"
+ type: FLOAT32
+ shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 64 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm_conv"
+ type: FLOAT32
+ shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+ name: "mul_const"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "add_const"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm_mul"
+ type: FLOAT32
+ shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+ name: "ofm_add"
+ type: FLOAT32
+ shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: VALID
+ stride_w: 2
+ stride_h: 2
+ }
+ input: "ifm_conv"
+ input: "filter"
+ input: "bias"
+ output: "ofm_conv"
+}
+operation {
+ type: "Mul"
+ input: "ofm_conv"
+ input: "mul_const"
+ output: "ofm_mul"
+ mul_options {
+ activation: NONE
+ }
+}
+operation {
+ type: "Add"
+ input: "ofm_mul"
+ input: "add_const"
+ output: "ofm_add"
+ add_options {
+ activation: RELU
+ }
+}
+input: "ifm_conv"
+output: "ofm_add"
--- /dev/null
+# To check if Add(with RELU) and Mul are fused to Convolution op
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "CONV_EXIST" $(op_count CONV_2D) '=' 1
+RULE "NO_MUL" $(op_count MUL) '=' 0
+RULE "NO_ADD" $(op_count ADD) '=' 0
--- /dev/null
+operand {
+ name: "ifm_conv"
+ type: FLOAT32
+ shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+ name: "filter"
+ type: FLOAT32
+ shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 64 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm_conv"
+ type: FLOAT32
+ shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+ name: "mul_const"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "add_const"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm_mul"
+ type: FLOAT32
+ shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+ name: "ofm_add"
+ type: FLOAT32
+ shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: VALID
+ stride_w: 2
+ stride_h: 2
+ }
+ input: "ifm_conv"
+ input: "filter"
+ input: "bias"
+ output: "ofm_conv"
+}
+operation {
+ type: "Mul"
+ input: "mul_const"
+ input: "ofm_conv"
+ output: "ofm_mul"
+ mul_options {
+ activation: NONE
+ }
+}
+operation {
+ type: "Add"
+ input: "add_const"
+ input: "ofm_mul"
+ output: "ofm_add"
+ add_options {
+ activation: NONE
+ }
+}
+input: "ifm_conv"
+output: "ofm_add"
--- /dev/null
+# To check if Add and Mul with reverse input sequence are fused to Convolution op
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "CONV_EXIST" $(op_count CONV_2D) '=' 1
+RULE "NO_MUL" $(op_count MUL) '=' 0
+RULE "NO_ADD" $(op_count ADD) '=' 0
--- /dev/null
+operand {
+ name: "ifm_conv"
+ type: FLOAT32
+ shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+ name: "filter"
+ type: FLOAT32
+ shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 64 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm_conv"
+ type: FLOAT32
+ shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+ name: "mul_const"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "add_const"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm_mul"
+ type: FLOAT32
+ shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+ name: "ofm_add"
+ type: FLOAT32
+ shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: VALID
+ stride_w: 2
+ stride_h: 2
+ }
+ input: "ifm_conv"
+ input: "filter"
+ input: "bias"
+ output: "ofm_conv"
+}
+operation {
+ type: "Mul"
+ input: "ofm_conv"
+ input: "mul_const"
+ output: "ofm_mul"
+ mul_options {
+ activation: RELU
+ }
+}
+operation {
+ type: "Add"
+ input: "ofm_mul"
+ input: "add_const"
+ output: "ofm_add"
+ add_options {
+ activation: NONE
+ }
+}
+input: "ifm_conv"
+output: "ofm_add"
--- /dev/null
+# To check if Add and Mul are not fused to Convolution op
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "CONV_EXIST" $(op_count CONV_2D) '=' 1
+RULE "MUL_EXIST" $(op_count MUL) '=' 1
+RULE "ADD_EXIST" $(op_count ADD) '=' 1
--- /dev/null
+operand {
+ name: "Placeholder"
+ type: FLOAT32
+ shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+ name: "Const_4"
+ type: FLOAT32
+ shape { }
+ filler { tag: "explicit" arg: "6" }
+}
+operand {
+ name: "Const_5"
+ type: FLOAT32
+ shape { }
+ filler { tag: "explicit" arg: "0" }
+}
+operand {
+ name: "Conv2D_1"
+ type: FLOAT32
+ shape { dim: 3 dim: 3 dim: 3 dim: 3 }
+ filler { tag: "gaussian" arg: "0.0" arg: "0.1" }
+}
+operand {
+ name: "Conv2D_2"
+ type: FLOAT32
+ shape { dim: 3 }
+ filler { tag: "gaussian" arg: "0.0" arg: "0.1" }
+}
+operand {
+ name: "Conv2D_21"
+ type: FLOAT32
+ shape { dim: 3 dim: 3 dim: 3 dim: 3 }
+ filler { tag: "gaussian" arg: "0.0" arg: "0.1" }
+}
+operand {
+ name: "Conv2D_11"
+ type: FLOAT32
+ shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+ name: "Minimum"
+ type: FLOAT32
+ shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+ name: "Maximum"
+ type: FLOAT32
+ shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+ name: "Conv2D_22"
+ type: FLOAT32
+ shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+ name: "Minimum_1"
+ type: FLOAT32
+ shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+ name: "Maximum_1"
+ type: FLOAT32
+ shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operation {
+ type: "Conv2D"
+ input: "Placeholder"
+ input: "Conv2D_1"
+ input: "Conv2D_2"
+ output: "Conv2D_11"
+ conv2d_options {
+ padding: SAME
+ stride_w: 1
+ stride_h: 1
+ activation: NONE
+ dilation_w_factor: 1
+ dilation_h_factor: 1
+ }
+}
+operation {
+ type: "Minimum"
+ input: "Conv2D_11"
+ input: "Const_4"
+ output: "Minimum"
+}
+operation {
+ type: "Maximum"
+ input: "Minimum"
+ input: "Const_5"
+ output: "Maximum"
+}
+operation {
+ type: "Conv2D"
+ input: "Maximum"
+ input: "Conv2D_21"
+ input: "Conv2D_2"
+ output: "Conv2D_22"
+ conv2d_options {
+ padding: SAME
+ stride_w: 1
+ stride_h: 1
+ activation: NONE
+ dilation_w_factor: 1
+ dilation_h_factor: 1
+ }
+}
+operation {
+ type: "Minimum"
+ input: "Conv2D_22"
+ input: "Const_4"
+ output: "Minimum_1"
+}
+operation {
+ type: "Maximum"
+ input: "Minimum_1"
+ input: "Const_5"
+ output: "Maximum_1"
+}
+input: "Placeholder"
+output: "Maximum_1"
--- /dev/null
+# To check if Minumum and Maximum are converte to Relu6 op
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "CONV_EXIST" $(op_count CONV_2D) '=' 2
+RULE "RELU6_EXIST" $(op_count RELU6) '=' 2
+RULE "MIN_NOT_EXIST" $(op_count MINUMUM) '=' 0
+RULE "MAX_NOT_EXIST" $(op_count MAXIMUM) '=' 0
--- /dev/null
+operand {
+ name: "Placeholder"
+ type: FLOAT32
+ shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+ name: "Conv2D_1"
+ type: FLOAT32
+ shape { dim: 3 dim: 3 dim: 3 dim: 3 }
+ filler { tag: "gaussian" arg: "0.0" arg: "0.1" }
+}
+operand {
+ name: "Conv2D_2"
+ type: FLOAT32
+ shape { dim: 3 }
+ filler { tag: "gaussian" arg: "0.0" arg: "0.1" }
+}
+operand {
+ name: "Conv2D_21"
+ type: FLOAT32
+ shape { dim: 3 dim: 3 dim: 3 dim: 3 }
+ filler { tag: "gaussian" arg: "0.0" arg: "0.1" }
+}
+operand {
+ name: "Conv2D_11"
+ type: FLOAT32
+ shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+ name: "ReLU6"
+ type: FLOAT32
+ shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+ name: "Conv2D_22"
+ type: FLOAT32
+ shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+ name: "ReLU6_1"
+ type: FLOAT32
+ shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operation {
+ type: "Conv2D"
+ input: "Placeholder"
+ input: "Conv2D_1"
+ input: "Conv2D_2"
+ output: "Conv2D_11"
+ conv2d_options {
+ padding: SAME
+ stride_w: 1
+ stride_h: 1
+ activation: NONE
+ dilation_w_factor: 1
+ dilation_h_factor: 1
+ }
+}
+operation {
+ type: "ReLU6"
+ input: "Conv2D_11"
+ output: "ReLU6"
+}
+operation {
+ type: "Conv2D"
+ input: "ReLU6"
+ input: "Conv2D_21"
+ input: "Conv2D_2"
+ output: "Conv2D_22"
+ conv2d_options {
+ padding: SAME
+ stride_w: 1
+ stride_h: 1
+ activation: NONE
+ dilation_w_factor: 1
+ dilation_h_factor: 1
+ }
+}
+operation {
+ type: "ReLU6"
+ input: "Conv2D_22"
+ output: "ReLU6_1"
+}
+input: "Placeholder"
+output: "ReLU6_1"
--- /dev/null
+# To check if ReLU6 is fused to Convolution op
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "CONV_EXIST" $(op_count CONV_2D) '=' 2
+RULE "RELU6_NOT_EXIST" $(op_count RELU6) '=' 0
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 64 dim: 64 dim: 8 }
+}
+operand {
+ name: "filter"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 8 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 8 }
+ filler {
+ tag: "constant"
+ arg: "1.1"
+ }
+}
+operand {
+ name: "scale"
+ type: FLOAT32
+ shape { dim: 8 }
+ filler {
+ tag: "constant"
+ arg: "1.1"
+ }
+}
+operand {
+ name: "shift"
+ type: FLOAT32
+ shape { dim: 8 }
+ filler {
+ tag: "constant"
+ arg: "1.1"
+ }
+}
+operand {
+ name: "dwout"
+ type: FLOAT32
+ shape { dim: 1 dim: 64 dim: 64 dim: 8 }
+}
+operand {
+ name: "mulout"
+ type: FLOAT32
+ shape { dim: 1 dim: 64 dim: 64 dim: 8 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 64 dim: 64 dim: 8 }
+}
+operation {
+ type: "DepthwiseConv2D"
+ depthwiseconv2d_options {
+ padding: SAME
+ stride_w: 1
+ stride_h: 1
+ depth_multiplier: 1
+ activation : NONE
+ }
+ input: "ifm"
+ input: "filter"
+ input: "bias"
+ output: "dwout"
+}
+operation {
+ type: "Mul"
+ input: "dwout"
+ input: "scale"
+ output: "mulout"
+ mul_options {
+ activation: NONE
+ }
+}
+operation {
+ type: "Add"
+ input: "mulout"
+ input: "shift"
+ output: "ofm"
+ add_options {
+ activation: RELU6
+ }
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+# To check if BatchNorm op(mul + add) is fused to Depthwise Convolution op
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "DWCONV_EXIST" $(op_count DEPTHWISE_CONV_2D) '=' 1
+RULE "NO_MUL" $(op_count MUL) '=' 0
+RULE "NO_ADD" $(op_count ADD) '=' 0
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 64 dim: 64 dim: 8 }
+}
+operand {
+ name: "filter"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 8 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 8 }
+ filler {
+ tag: "constant"
+ arg: "1.1"
+ }
+}
+operand {
+ name: "scale"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 8 }
+ filler {
+ tag: "constant"
+ arg: "1.1"
+ }
+}
+operand {
+ name: "shift"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 8 }
+ filler {
+ tag: "constant"
+ arg: "1.1"
+ }
+}
+operand {
+ name: "dwout"
+ type: FLOAT32
+ shape { dim: 1 dim: 64 dim: 64 dim: 8 }
+}
+operand {
+ name: "mulout"
+ type: FLOAT32
+ shape { dim: 1 dim: 64 dim: 64 dim: 8 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 64 dim: 64 dim: 8 }
+}
+operation {
+ type: "DepthwiseConv2D"
+ depthwiseconv2d_options {
+ padding: SAME
+ stride_w: 1
+ stride_h: 1
+ depth_multiplier: 1
+ activation : NONE
+ }
+ input: "ifm"
+ input: "filter"
+ input: "bias"
+ output: "dwout"
+}
+operation {
+ type: "Mul"
+ input: "dwout"
+ input: "scale"
+ output: "mulout"
+ mul_options {
+ activation: NONE
+ }
+}
+operation {
+ type: "Add"
+ input: "mulout"
+ input: "shift"
+ output: "ofm"
+ add_options {
+ activation: RELU6
+ }
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+# To check if BatchNorm op(mul + add) is fused to Depthwise Convolution op
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "DWCONV_EXIST" $(op_count DEPTHWISE_CONV_2D) '=' 1
+RULE "NO_MUL" $(op_count MUL) '=' 0
+RULE "NO_ADD" $(op_count ADD) '=' 0
name: "sequential/instance_normalization/stack"
type: INT32
shape {
- dim: 5
+ dim: 4
}
filler {
tag: "explicit"
arg: "32"
arg: "32"
arg: "8"
- arg: "1"
}
}
operand {
dim: 1
dim: 1
dim: 8
- dim: 1
}
filler {
tag: "explicit"
dim: 1
dim: 1
dim: 8
- dim: 1
}
filler {
tag: "explicit"
name: "sequential/instance_normalization/moments/variance/reduction_indices"
type: INT32
shape {
- dim: 3
+ dim: 2
}
filler {
tag: "explicit"
arg: "1"
arg: "2"
- arg: "4"
}
}
operand {
dim: 32
dim: 32
dim: 8
- dim: 1
}
}
operand {
dim: 1
dim: 1
dim: 8
- dim: 1
}
}
operand {
dim: 32
dim: 32
dim: 8
- dim: 1
}
}
operand {
dim: 1
dim: 1
dim: 8
- dim: 1
}
}
operand {
dim: 1
dim: 1
dim: 8
- dim: 1
}
}
operand {
dim: 1
dim: 1
dim: 8
- dim: 1
}
}
operand {
dim: 1
dim: 1
dim: 8
- dim: 1
}
}
operand {
dim: 32
dim: 32
dim: 8
- dim: 1
}
}
operand {
dim: 1
dim: 1
dim: 8
- dim: 1
}
}
operand {
dim: 1
dim: 1
dim: 8
- dim: 1
}
}
operand {
dim: 32
dim: 32
dim: 8
- dim: 1
}
}
operand {
dim: 8
}
}
-operation {
- type: "Reshape"
- input: "input_layer"
- input: "sequential/instance_normalization/stack"
- output: "sequential/instance_normalization/Reshape"
-}
operation {
type: "Mean"
- input: "sequential/instance_normalization/Reshape"
+ input: "input_layer"
input: "sequential/instance_normalization/moments/variance/reduction_indices"
output: "sequential/instance_normalization/moments/mean"
mean_options {
}
operation {
type: "SquaredDifference"
- input: "sequential/instance_normalization/Reshape"
+ input: "input_layer"
input: "sequential/instance_normalization/moments/mean"
output: "sequential/instance_normalization/moments/SquaredDifference"
}
}
operation {
type: "Mul"
- input: "sequential/instance_normalization/Reshape"
+ input: "input_layer"
input: "sequential/instance_normalization/batchnorm/mul"
output: "sequential/instance_normalization/batchnorm/mul_1"
mul_options {
activation: NONE
}
}
-operation {
- type: "Reshape"
- input: "sequential/instance_normalization/batchnorm/add_1"
- input: "sequential/instance_normalization/Shape"
- output: "Identity"
-}
input: "input_layer"
-output: "Identity"
+output: "sequential/instance_normalization/batchnorm/add_1"
RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
RULE "INSTANCE_NORM_EXIST" $(op_count INSTANCE_NORM) '=' 1
-RULE "RESHAPE_EXIST" $(op_count RESHAPE) '=' 3
+RULE "RESHAPE_EXIST" $(op_count RESHAPE) '<=' 3
RULE "NO_ADD" $(op_count ADD) '=' 0
RULE "NO_MUL" $(op_count MUL) '=' 0
--- /dev/null
+operand {
+ name: "Const"
+ type: FLOAT32
+ shape {
+ }
+ filler {
+ tag: "explicit"
+ arg: "6"
+ }
+ quant {
+ quantized_dimension: 0
+ }
+ is_variable: false
+}
+operand {
+ name: "Const_1"
+ type: FLOAT32
+ shape {
+ }
+ filler {
+ tag: "explicit"
+ arg: "0"
+ }
+ quant {
+ quantized_dimension: 0
+ }
+ is_variable: false
+}
+operand {
+ name: "Hole"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 3
+ dim: 3
+ dim: 4
+ }
+ quant {
+ min: 0
+ max: 255
+ quantized_dimension: 0
+ }
+ is_variable: false
+}
+operand {
+ name: "Maximum"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 3
+ dim: 3
+ dim: 4
+ }
+ quant {
+ quantized_dimension: 0
+ }
+ is_variable: false
+}
+operand {
+ name: "Minimum"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 3
+ dim: 3
+ dim: 4
+ }
+ quant {
+ quantized_dimension: 0
+ }
+ is_variable: false
+}
+operation {
+ type: "Minimum"
+ input: "Hole"
+ input: "Const"
+ output: "Minimum"
+}
+operation {
+ type: "Maximum"
+ input: "Minimum"
+ input: "Const_1"
+ output: "Maximum"
+}
+input: "Hole"
+output: "Maximum"
--- /dev/null
+# To check if Maximum and Minimum is fused to Relu6.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "RELU6_EXIST" $(op_count RELU6) '=' 1
+RULE "NO_MAXIMUM" $(op_count MAXIMUM) '=' 0
+RULE "NO_MINIMUM" $(op_count MINIMUM) '=' 0
dim: 4
dim: 16
}
- filler {
- tag: "gaussian"
- arg: "0.0"
- arg: "0.1"
- }
}
operand {
name: "Weights1"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 2 dim: 3 dim: 6 }
+}
+operand {
+ name: "shape1"
+ type: INT32
+ shape { dim: 2 }
+ filler { tag: "explicit" arg: "6" arg: "6" }
+}
+operand {
+ name: "reshape_out"
+ type: FLOAT32
+ shape { dim: 6 dim: 6 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 6 dim: 6 }
+}
+operation {
+ type: "Reshape"
+ input: "ifm"
+ input: "shape1"
+ output: "reshape_out"
+}
+operation {
+ type: "Neg"
+ input: "reshape_out"
+ output: "ofm"
+}
+
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 2 dim: 3 dim: 6 }
+}
+operand {
+ name: "shape1"
+ type: INT32
+ shape { dim: 2 }
+ filler { tag: "explicit" arg: "6" arg: "6" }
+}
+operand {
+ name: "shape2"
+ type: INT32
+ shape { dim: 3 }
+ filler { tag: "explicit" arg: "6" arg: "2" arg: "3" }
+}
+operand {
+ name: "reshape_out"
+ type: FLOAT32
+ shape { dim: 6 dim: 6 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 6 dim: 2 dim: 3 }
+}
+operation {
+ type: "Reshape"
+ input: "ifm"
+ input: "shape1"
+ output: "reshape_out"
+}
+operation {
+ type: "Reshape"
+ input: "reshape_out"
+ input: "shape2"
+ output: "ofm"
+}
+
+input: "ifm"
+output: "ofm"
--- /dev/null
+# To check if Redundant Reshape removed.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "RESHAPE_EXIST" $(op_count RESHAPE) '=' 1
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 16 dim: 1 dim: 1 }
+}
+operand {
+ name: "t1"
+ type: FLOAT32
+ shape { dim: 1 dim: 16 dim: 1 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 16 }
+}
+operation {
+ type: "Squeeze"
+ squeeze_options { squeeze_dim: 3 }
+ input: "ifm"
+ output: "t1"
+}
+operation {
+ type: "Squeeze"
+ squeeze_options { squeeze_dim: 2 }
+ input: "t1"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+# To check if Squeeze is substituted to Reshape op
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "SQUEEZE_COUNT" $(op_count SQUEEZE) '=' 0
+RULE "RESHAPE_COUNT" $(op_count RESHAPE) '=' 2
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 2 dim: 4 }
+}
+operand {
+ name: "begin"
+ type: INT32
+ shape { dim: 3 }
+ filler { tag: "explicit" arg: "0" arg: "0" arg: "0" }
+}
+operand {
+ name: "end"
+ type: INT32
+ shape { dim: 3 }
+ filler { tag: "explicit" arg: "1" arg: "2" arg: "4" }
+}
+operand {
+ name: "strides"
+ type: INT32
+ shape { dim: 3 }
+ filler { tag: "explicit" arg: "1" arg: "1" arg: "1" }
+}
+operand {
+ name: "output_1"
+ type: FLOAT32
+ shape { dim: 1 dim: 2 dim: 4 }
+}
+operation {
+ type: "StridedSlice"
+ strided_slice_options {
+ begin_mask: 0
+ end_mask: 0
+ ellipsis_mask: 0
+ new_axis_mask: 0
+ shrink_axis_mask: 0
+ }
+ input: "ifm"
+ input: "begin"
+ input: "end"
+ input: "strides"
+ output: "output_1"
+}
+operand {
+ name: "begin_2"
+ type: INT32
+ shape { dim: 3 }
+ filler { tag: "explicit" arg: "0" arg: "0" arg: "0" }
+}
+operand {
+ name: "end_2"
+ type: INT32
+ shape { dim: 3 }
+ filler { tag: "explicit" arg: "0" arg: "1" arg: "0" }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim:1 dim: 4}
+}
+operation {
+ type: "StridedSlice"
+ strided_slice_options {
+ begin_mask: 5
+ end_mask: 5
+ ellipsis_mask: 0
+ new_axis_mask: 0
+ shrink_axis_mask: 2
+ }
+ input: "output_1"
+ input: "begin_2"
+ input: "end_2"
+ input: "strides"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+# To check if Unnecessary StridedSlice removed.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "STRIDEDSLICE_EXIST" $(op_count STRIDEDSLICE) '=' 1
--- /dev/null
+# Tconv with asymmetric filter + BN + Relu6
+operand {
+ name: "Hole"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 1
+ dim: 1
+ dim: 2
+ }
+ quant {
+ quantized_dimension: 0
+ }
+ is_variable: false
+}
+operand {
+ name: "conv2d_transpose/input_sizes"
+ type: INT32
+ shape {
+ dim: 4
+ }
+ filler {
+ tag: "explicit"
+ arg: "1"
+ arg: "5"
+ arg: "1"
+ arg: "2"
+ }
+ quant {
+ quantized_dimension: 0
+ }
+ is_variable: false
+}
+operand {
+ name: "FusedBatchNormV3"
+ type: FLOAT32
+ shape {
+ dim: 2
+ }
+ filler {
+ tag: "explicit"
+ arg: "-2.04724"
+ arg: "-7.80109"
+ }
+ quant {
+ quantized_dimension: 0
+ }
+ is_variable: false
+}
+operand {
+ name: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes"
+ type: FLOAT32
+ shape {
+ dim: 2
+ dim: 5
+ dim: 1
+ dim: 2
+ }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "0.1"
+ }
+ quant {
+ quantized_dimension: 0
+ }
+ is_variable: false
+}
+operand {
+ name: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes2"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 5
+ dim: 1
+ dim: 2
+ }
+ quant {
+ quantized_dimension: 0
+ }
+ is_variable: false
+}
+operand {
+ name: "FusedBatchNormV3_mul_0"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 5
+ dim: 1
+ dim: 2
+ }
+ quant {
+ quantized_dimension: 0
+ }
+}
+operand {
+ name: "FusedBatchNormV3_mul_0_param"
+ type: FLOAT32
+ shape {
+ dim: 2
+ }
+ filler {
+ tag: "explicit"
+ arg: "2.00834"
+ arg: "1.00344"
+ }
+ quant {
+ quantized_dimension: 0
+ }
+}
+operand {
+ name: "Relu6"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 5
+ dim: 1
+ dim: 2
+ }
+ quant {
+ quantized_dimension: 0
+ }
+ is_variable: false
+}
+operation {
+ type: "TransposeConv"
+ input: "conv2d_transpose/input_sizes"
+ input: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes"
+ input: "Hole"
+ output: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes2"
+ transpose_conv_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ }
+}
+operation {
+ type: "Mul"
+ input: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes2"
+ input: "FusedBatchNormV3_mul_0_param"
+ output: "FusedBatchNormV3_mul_0"
+ mul_options {
+ activation: NONE
+ }
+}
+operation {
+ type: "Add"
+ input: "FusedBatchNormV3_mul_0"
+ input: "FusedBatchNormV3"
+ output: "Relu6"
+ add_options {
+ activation: RELU6
+ }
+}
+input: "Hole"
+output: "Relu6"
--- /dev/null
+# To check if BatchNorm op(mul + add) is fused to Transposed Convolution op
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "TCONV_EXIST" $(op_count TRANSPOSE_CONV) '=' 1
+RULE "RELU6_EXIST" $(op_count RELU6) '=' 1
+RULE "NO_MUL" $(op_count MUL) '=' 0
+RULE "NO_ADD" $(op_count ADD) '=' 0
--- /dev/null
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ifm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "add"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "Add"
+ add_options {
+ activation: NONE
+ }
+ input: "ifm1"
+ input: "ifm2"
+ output: "add"
+}
+operation {
+ type: "Sqrt"
+ input: "add"
+ output: "ofm1"
+}
+operation {
+ type: "Sqrt"
+ input: "add"
+ output: "ofm2"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm1"
+output: "ofm2"
--- /dev/null
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ifm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "add"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "sqrt1"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "sqrt2"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "Add"
+ add_options {
+ activation: NONE
+ }
+ input: "ifm1"
+ input: "ifm2"
+ output: "add"
+}
+operation {
+ type: "Sqrt"
+ input: "add"
+ output: "sqrt1"
+}
+operation {
+ type: "Sqrt"
+ input: "add"
+ output: "sqrt2"
+}
+operation {
+ type: "Rsqrt"
+ input: "sqrt1"
+ output: "ofm1"
+}
+operation {
+ type: "Rsqrt"
+ input: "sqrt2"
+ output: "ofm2"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm1"
+output: "ofm2"
--- /dev/null
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ifm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ifm3"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ifm4"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "add1"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "add2"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "Add"
+ add_options {
+ activation: NONE
+ }
+ input: "ifm1"
+ input: "ifm2"
+ output: "add1"
+}
+operation {
+ type: "Add"
+ add_options {
+ activation: NONE
+ }
+ input: "add1"
+ input: "ifm3"
+ output: "add2"
+}
+operation {
+ type: "Sub"
+ sub_options {
+ activation: NONE
+ }
+ input: "add2"
+ input: "ifm4"
+ output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+input: "ifm3"
+input: "ifm4"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "sqrt"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "Sqrt"
+ input: "ifm"
+ output: "sqrt"
+}
+operation {
+ type: "Rsqrt"
+ input: "sqrt"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "sqrt"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "sqrt2"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "rsqrt"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "Sqrt"
+ input: "ifm"
+ output: "sqrt"
+}
+operation {
+ type: "Sqrt"
+ input: "sqrt"
+ output: "sqrt2"
+}
+operation {
+ type: "Rsqrt"
+ input: "sqrt2"
+ output: "rsqrt"
+}
+operation {
+ type: "Rsqrt"
+ input: "rsqrt"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "sqrt"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "rsqrt"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "sqrt2"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "Sqrt"
+ input: "ifm"
+ output: "sqrt"
+}
+operation {
+ type: "Rsqrt"
+ input: "sqrt"
+ output: "rsqrt"
+}
+operation {
+ type: "Sqrt"
+ input: "rsqrt"
+ output: "sqrt2"
+}
+operation {
+ type: "Rsqrt"
+ input: "sqrt2"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "sqrt"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "rsqrt"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "rsqrt2"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "Sqrt"
+ input: "ifm"
+ output: "sqrt"
+}
+operation {
+ type: "Rsqrt"
+ input: "sqrt"
+ output: "rsqrt"
+}
+operation {
+ type: "Rsqrt"
+ input: "rsqrt"
+ output: "rsqrt2"
+}
+operation {
+ type: "Sqrt"
+ input: "rsqrt2"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "sqrt"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "Sqrt"
+ input: "ifm"
+ output: "sqrt"
+}
+operation {
+ type: "Rsqrt"
+ input: "sqrt"
+ output: "ofm1"
+}
+operation {
+ type: "Rsqrt"
+ input: "sqrt"
+ output: "ofm2"
+}
+input: "ifm"
+output: "ofm1"
+output: "ofm2"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "rsqrt"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "sqrt"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "rsqrt2"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "add"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "Rsqrt"
+ input: "ifm"
+ output: "rsqrt"
+}
+operation {
+ type: "Sqrt"
+ input: "rsqrt"
+ output: "sqrt"
+}
+operation {
+ type: "Rsqrt"
+ input: "rsqrt"
+ output: "rsqrt2"
+}
+operation {
+ type: "Add"
+ add_options {
+ activation: NONE
+ }
+ input: "sqrt"
+ input: "rsqrt2"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "rsqrt"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "sqrt"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "rsqrt2"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "rsqrt3"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "Rsqrt"
+ input: "ifm"
+ output: "rsqrt"
+}
+operation {
+ type: "Sqrt"
+ input: "rsqrt"
+ output: "sqrt"
+}
+operation {
+ type: "Rsqrt"
+ input: "rsqrt"
+ output: "rsqrt2"
+}
+operation {
+ type: "Rsqrt"
+ input: "rsqrt2"
+ output: "rsqrt3"
+}
+operation {
+ type: "Add"
+ add_options {
+ activation: NONE
+ }
+ input: "sqrt"
+ input: "rsqrt3"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "rsqrt"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "sqrt"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "rsqrt2"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "rsqrt3"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "rsqrt4"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "Rsqrt"
+ input: "ifm"
+ output: "rsqrt"
+}
+operation {
+ type: "Sqrt"
+ input: "rsqrt"
+ output: "sqrt"
+}
+operation {
+ type: "Rsqrt"
+ input: "rsqrt"
+ output: "rsqrt2"
+}
+operation {
+ type: "Rsqrt"
+ input: "sqrt"
+ output: "rsqrt3"
+}
+operation {
+ type: "Rsqrt"
+ input: "rsqrt2"
+ output: "rsqrt4"
+}
+operation {
+ type: "Add"
+ add_options {
+ activation: NONE
+ }
+ input: "rsqrt3"
+ input: "rsqrt4"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ifm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "rsqrt"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "sqrt"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "Rsqrt"
+ input: "ifm1"
+ output: "rsqrt"
+}
+operation {
+ type: "Sqrt"
+ input: "ifm2"
+ output: "sqrt"
+}
+operation {
+ type: "Add"
+ add_options {
+ activation: NONE
+ }
+ input: "rsqrt"
+ input: "sqrt"
+ output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "rsqrt"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "sqrt"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "Rsqrt"
+ input: "ifm"
+ output: "rsqrt"
+}
+operation {
+ type: "Sqrt"
+ input: "rsqrt"
+ output: "sqrt"
+}
+operation {
+ type: "Add"
+ add_options {
+ activation: NONE
+ }
+ input: "rsqrt"
+ input: "sqrt"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 3 dim: 2 dim: 3 }
+}
+operand {
+ name: "begin"
+ type: INT32
+ shape { dim: 3 }
+ filler {
+ tag: "explicit"
+ arg: "-1" arg: "0" arg: "0"
+ }
+}
+operand {
+ name: "size"
+ type: INT32
+ shape { dim: 3 }
+ filler {
+ tag: "explicit"
+ arg: "1" arg: "1" arg: "3"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 3 }
+}
+operation {
+ type: "Slice"
+ input: "ifm"
+ input: "begin"
+ input: "size"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 5 dim: 1 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 4 dim: 5 }
+}
+operation {
+ type: "Squeeze"
+ squeeze_options { }
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+import tensorflow as tf
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[28, 28, 3], name="Hole")
+
+op_uni_ = tf.compat.v1.keras.layers.LSTM(1, time_major=False, return_sequences=True)
+op_bidi_ = tf.compat.v1.keras.layers.Bidirectional(op_uni_)(in_)
--- /dev/null
+import tensorflow as tf
+import numpy as np
+
+tf.compat.v1.disable_eager_execution()
+
+in_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 32, 32, 3), name="Hole")
+
+filters = np.random.uniform(low=-1., high=1, size=[5, 5, 3, 32]).astype(np.float32)
+strides = (1, 2, 2, 1)
+cv_ = tf.compat.v1.nn.conv2d(in_, filters, strides, "VALID", data_format="NHWC")
+
+op_ = tf.compat.v1.fake_quant_with_min_max_vars(cv_, 0.0, 1.0, 8, False)
+'''
+NOTE:
+'fake_quant_with_min_max_vars' is converted to QUANTIZE-DEQUANTIZE in tflite.
+To produce tflite with FAKE_QUANT Op, you need to change tf2tfliteV2.py with
+
+converter.experimental_new_converter = False
+
+and then run
+
+python3 ../../compiler/tf2tfliteV2/tf2tfliteV2.py --v2 --graph_def \
+-i ./fake_quant_with_min_max_vars.pbtxt \
+-o ./fake_quant_with_min_max_vars.tflite \
+-I Hole \
+-O FakeQuantWithMinMaxVars
+'''
--- /dev/null
+import tensorflow as tf
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 16, 160, 160), name="Hole")
+
+upper_ = tf.compat.v1.constant(6.)
+lower_ = tf.compat.v1.constant(0.)
+
+min_ = tf.compat.v1.minimum(in_, upper_)
+max_ = tf.compat.v1.maximum(min_, lower_)
+'''
+python ../../compiler/tf2tfliteV2/tf2tfliteV2.py --v1 \
+-i minimum-maximum.pbtxt \
+-o minimum-maximum.tflite \
+-I Hole -O Maximum
+'''
+#!/usr/bin/env python
+
# TensorFlow Python Example Manager
import tensorflow as tf
+++ /dev/null
-../../.clang-format.8
\ No newline at end of file
$(error ONERT_PREBUILT_LIB_DIR is not set)
endif
-# libcircle_loader
-include $(CLEAR_VARS)
-LOCAL_MODULE := circle_loader
-PREBUILT_LIB += circle_loader
-LOCAL_SRC_FILES := \
- $(ONERT_PREBUILT_LIB_DIR)/libcircle_loader.so
-include $(PREBUILT_SHARED_LIBRARY)
-
-# libtflite_loader
-include $(CLEAR_VARS)
-LOCAL_MODULE := tflite_loader
-PREBUILT_LIB += tflite_loader
-LOCAL_SRC_FILES := \
- $(ONERT_PREBUILT_LIB_DIR)/libtflite_loader.so
-include $(PREBUILT_SHARED_LIBRARY)
-
# libnnfw
include $(CLEAR_VARS)
LOCAL_MODULE := nnfw-dev
minSdkVersion 26
targetSdkVersion 29
versionCode 1
- versionName "1.12.0"
+ versionName "1.15.0"
externalNativeBuild {
ndkBuild {
target_link_libraries(style_transfer_app onert_core onert tflite_loader)
target_link_libraries(style_transfer_app tensorflow-lite ${LIB_PTHREAD} dl nnfw_lib_tflite)
target_link_libraries(style_transfer_app nnfw-dev)
-target_link_libraries(tflite_loader_test_tool ${Boost_PROGRAM_OPTIONS_LIBRARY} ${Boost_SYSTEM_LIBRARY} ${Boost_FILESYSTEM_LIBRARY})
+target_link_libraries(tflite_comparator ${Boost_PROGRAM_OPTIONS_LIBRARY} ${Boost_SYSTEM_LIBRARY} ${Boost_FILESYSTEM_LIBRARY})
if(JPEG_FOUND)
target_link_libraries(style_transfer_app ${JPEG_LIBRARIES})
endif(JPEG_FOUND)
+++ /dev/null
-../../.clang-format.8
\ No newline at end of file
sleep_time.tv_nsec = micros * 1e3;
nanosleep(&sleep_time, nullptr);
}
-}
+} // namespace
namespace benchmark
{
std::normal_distribution<float> _dist;
};
+template <> int8_t RandomGenerator::generate<int8_t>(void);
template <> uint8_t RandomGenerator::generate<uint8_t>(void);
template <> bool RandomGenerator::generate<bool>(void);
template <> int32_t RandomGenerator::generate<int32_t>(void);
namespace misc
{
+template <> int8_t RandomGenerator::generate<int8_t>(void)
+{
+ // The value of type_range is 255.
+ float type_range = static_cast<float>(std::numeric_limits<int8_t>::max()) -
+ static_cast<float>(std::numeric_limits<int8_t>::min());
+ // Most _dist values range from -5.0 to 5.0.
+ float min_range = -5.0f;
+ float max_range = 5.0f;
+ // NOTE shifted_relative_val has Gaussian distribution that origin mean was 0 and standard
+ // deviation was 2. And then its values are distributed and shift to that mean is 127.5 and range
+ // is about [0, 255].
+ float shifted_relative_val = (_dist(_rand) - min_range) * type_range / (max_range - min_range);
+
+ // shifted_relative_val is adjusted to be mapped to end points of the range, if it is out of range
+ // values.
+ if (shifted_relative_val < -128.0f)
+ {
+ return -128;
+ }
+ else if (shifted_relative_val > type_range)
+ {
+ return 127;
+ }
+
+ // Convert shifted_relative_val from float to int8
+ return static_cast<int8_t>(shifted_relative_val);
+}
+
template <> uint8_t RandomGenerator::generate<uint8_t>(void)
{
// The value of type_range is 255.
set_property(TARGET nnfw_lib_profiling PROPERTY POSITION_INDEPENDENT_CODE ON)
target_include_directories(nnfw_lib_profiling PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
target_link_libraries(nnfw_lib_profiling PRIVATE nnfw_common)
-target_link_libraries(nnfw_lib_profiling PRIVATE nnfw_coverage)
target_link_libraries(nnfw_lib_rua_anchor PUBLIC nnfw_lib_rua_core)
target_link_libraries(nnfw_lib_rua_anchor PRIVATE nnfw_lib_rua_dyn)
target_link_libraries(nnfw_lib_rua_anchor PRIVATE nnfw_common)
-target_link_libraries(nnfw_lib_rua_anchor PRIVATE nnfw_coverage)
target_include_directories(nnfw_lib_rua_dyn PUBLIC include)
target_link_libraries(nnfw_lib_rua_dyn PUBLIC nnfw_lib_rua_core)
target_link_libraries(nnfw_lib_rua_dyn PRIVATE nnfw_common)
-target_link_libraries(nnfw_lib_rua_dyn PRIVATE nnfw_coverage)
target_link_libraries(nnfw_lib_tflite PUBLIC nnfw_lib_misc)
target_link_libraries(nnfw_lib_tflite PRIVATE ${LIB_PTHREAD} dl)
target_link_libraries(nnfw_lib_tflite PRIVATE nnfw_common)
-target_link_libraries(nnfw_lib_tflite PRIVATE nnfw_coverage)
if(NOT ENABLE_TEST)
return()
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_TFLITE_COPY_INPUT_INITIALIZER_H__
+#define __NNFW_TFLITE_COPY_INPUT_INITIALIZER_H__
+
+#include <tensorflow/lite/interpreter.h>
+
+namespace nnfw
+{
+namespace tflite
+{
+
+class CopyInputInitializer
+{
+public:
+ CopyInputInitializer(::tflite::Interpreter &from) : _from{from}
+ {
+ // DO NOTHING
+ }
+
+ void run(::tflite::Interpreter &interp);
+
+private:
+ template <typename T> void setValue(::tflite::Interpreter &interp, int tensor_idx);
+
+private:
+ ::tflite::Interpreter &_from;
+};
+
+} // namespace tflite
+} // namespace nnfw
+
+#endif // __NNFW_TFLITE_COPY_INPUT_INITIALIZER_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_TFLITE_OUTPUT_RESETTER_H__
+#define __NNFW_TFLITE_OUTPUT_RESETTER_H__
+
+#include <tensorflow/lite/interpreter.h>
+
+namespace nnfw
+{
+namespace tflite
+{
+
+class OutputResetter
+{
+public:
+ OutputResetter()
+ {
+ // DO NOTHING
+ }
+
+ void run(::tflite::Interpreter &interp);
+
+private:
+ template <typename T> void resetValue(::tflite::Interpreter &interp, int tensor_idx);
+};
+
+} // namespace tflite
+} // namespace nnfw
+
+#endif // __NNFW_TFLITE_OUTPUT_RESETTER_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_TFLITE_RANDOM_INPUT_INITIALIZER_H__
+#define __NNFW_TFLITE_RANDOM_INPUT_INITIALIZER_H__
+
+#include <misc/RandomGenerator.h>
+
+#include <tensorflow/lite/interpreter.h>
+
+namespace nnfw
+{
+namespace tflite
+{
+
+class RandomInputInitializer
+{
+public:
+ RandomInputInitializer(misc::RandomGenerator &randgen) : _randgen{randgen}
+ {
+ // DO NOTHING
+ }
+
+ void run(::tflite::Interpreter &interp);
+
+private:
+ template <typename T> void setValue(::tflite::Interpreter &interp, int tensor_idx);
+
+private:
+ nnfw::misc::RandomGenerator &_randgen;
+};
+
+} // namespace tflite
+} // namespace nnfw
+
+#endif // __NNFW_TFLITE_RANDOM_INPUT_INITIALIZER_H__
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file TensorShapeUtils.h
- * @brief This file contains utilities function of tensor shape
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_TENSOR_SHAPE_UTILS_H__
-#define __NNFW_TFLITE_TENSOR_SHAPE_UTILS_H__
-
-#include "misc/tensor/Shape.h"
-
-#include <vector>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Converts tensor::Shape into a vector
- * @param[in] shape The tensor shape to be converted
- * @return vector value of given shape object
- */
-static inline std::vector<int32_t> as_dims(const nnfw::misc::tensor::Shape &shape)
-{
- std::vector<int32_t> dims;
-
- for (uint32_t axis = 0; axis < shape.rank(); ++axis)
- {
- dims.emplace_back(shape.dim(axis));
- }
-
- return dims;
-}
-
-/**
- * @brief Broadcasts between two given shapes
- * @param[in] lhs_shape The left hand side shape
- * @param[in] rhs_shape The right hand side shape
- * @return The broadcasted shape
- */
-nnfw::misc::tensor::Shape broadcast(const nnfw::misc::tensor::Shape &lhs_shape,
- const nnfw::misc::tensor::Shape &rhs_shape);
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_TENSOR_SHAPE_UTILS_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tflite/CopyInputInitializer.h"
+#include "tflite/TensorView.h"
+
+#include <misc/tensor/IndexIterator.h>
+
+namespace nnfw
+{
+namespace tflite
+{
+
+void CopyInputInitializer::run(::tflite::Interpreter &interp)
+{
+ for (const auto &tensor_idx : interp.inputs())
+ {
+ TfLiteTensor *tensor = interp.tensor(tensor_idx);
+ switch (tensor->type)
+ {
+ case kTfLiteInt32:
+ setValue<int32_t>(interp, tensor_idx);
+ break;
+ case kTfLiteUInt8:
+ setValue<uint8_t>(interp, tensor_idx);
+ break;
+ case kTfLiteInt8:
+ setValue<int8_t>(interp, tensor_idx);
+ break;
+ case kTfLiteBool:
+ setValue<bool>(interp, tensor_idx);
+ break;
+ case kTfLiteFloat32:
+ setValue<float>(interp, tensor_idx);
+ break;
+ default:
+ throw std::runtime_error{"Not supported input type"};
+ }
+ }
+}
+
+template <typename T>
+void CopyInputInitializer::setValue(::tflite::Interpreter &interp, int tensor_idx)
+{
+ auto tensor_from_view = nnfw::tflite::TensorView<T>::make(_from, tensor_idx);
+ auto tensor_to_view = nnfw::tflite::TensorView<T>::make(interp, tensor_idx);
+
+ nnfw::misc::tensor::iterate(tensor_from_view.shape())
+ << [&](const nnfw::misc::tensor::Index &ind) {
+ tensor_to_view.at(ind) = tensor_from_view.at(ind);
+ };
+}
+
+} // namespace tflite
+} // namespace nnfw
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tflite/OutputResetter.h"
+#include "tflite/TensorView.h"
+
+#include <misc/tensor/IndexIterator.h>
+
+namespace nnfw
+{
+namespace tflite
+{
+
+void OutputResetter::run(::tflite::Interpreter &interp)
+{
+ for (const auto &tensor_idx : interp.outputs())
+ {
+ TfLiteTensor *tensor = interp.tensor(tensor_idx);
+ switch (tensor->type)
+ {
+ case kTfLiteInt32:
+ resetValue<int32_t>(interp, tensor_idx);
+ break;
+ case kTfLiteUInt8:
+ resetValue<uint8_t>(interp, tensor_idx);
+ break;
+ case kTfLiteInt8:
+ resetValue<int8_t>(interp, tensor_idx);
+ break;
+ case kTfLiteBool:
+ resetValue<bool>(interp, tensor_idx);
+ break;
+ case kTfLiteFloat32:
+ resetValue<float>(interp, tensor_idx);
+ break;
+ default:
+ throw std::runtime_error{"Not supported output type"};
+ }
+ }
+}
+
+template <typename T> void OutputResetter::resetValue(::tflite::Interpreter &interp, int tensor_idx)
+{
+ auto tensor_view = nnfw::tflite::TensorView<T>::make(interp, tensor_idx);
+
+ nnfw::misc::tensor::iterate(tensor_view.shape())
+ << [&](const nnfw::misc::tensor::Index &ind) { tensor_view.at(ind) = 0; };
+}
+
+} // namespace tflite
+} // namespace nnfw
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tflite/RandomInputInitializer.h"
+#include "tflite/TensorView.h"
+
+#include <misc/tensor/IndexIterator.h>
+
+namespace nnfw
+{
+namespace tflite
+{
+
+void RandomInputInitializer::run(::tflite::Interpreter &interp)
+{
+ for (const auto &tensor_idx : interp.inputs())
+ {
+ TfLiteTensor *tensor = interp.tensor(tensor_idx);
+ switch (tensor->type)
+ {
+ case kTfLiteFloat32:
+ setValue<float>(interp, tensor_idx);
+ break;
+ case kTfLiteInt32:
+ setValue<int32_t>(interp, tensor_idx);
+ break;
+ case kTfLiteUInt8:
+ setValue<uint8_t>(interp, tensor_idx);
+ break;
+ case kTfLiteBool:
+ setValue<bool>(interp, tensor_idx);
+ break;
+ case kTfLiteInt8:
+ setValue<int8_t>(interp, tensor_idx);
+ break;
+ default:
+ throw std::runtime_error{"Not supported input type"};
+ }
+ }
+}
+
+template <typename T>
+void RandomInputInitializer::setValue(::tflite::Interpreter &interp, int tensor_idx)
+{
+ auto tensor_view = nnfw::tflite::TensorView<T>::make(interp, tensor_idx);
+
+ nnfw::misc::tensor::iterate(tensor_view.shape())
+ << [&](const nnfw::misc::tensor::Index &ind) { tensor_view.at(ind) = _randgen.generate<T>(); };
+}
+
+} // namespace tflite
+} // namespace nnfw
* limitations under the License.
*/
+#include "tflite/CopyInputInitializer.h"
+#include "tflite/OutputResetter.h"
+#include "tflite/RandomInputInitializer.h"
#include "tflite/RandomTestRunner.h"
#include "tflite/Diff.h"
#include "tflite/TensorLogger.h"
_nnapi = builder.build();
_tfl_interp->UseNNAPI(false);
+ _nnapi->UseNNAPI(true);
// Allocate Tensors
_tfl_interp->AllocateTensors();
_nnapi->AllocateTensors();
+}
+int RandomTestRunner::run(size_t running_count)
+{
assert(_tfl_interp->inputs() == _nnapi->inputs());
+ assert(_tfl_interp->outputs() == _nnapi->outputs());
- using ::tflite::Interpreter;
- using Initializer = std::function<void(int id, Interpreter *, Interpreter *)>;
-
- std::map<TfLiteType, Initializer> initializers;
- std::map<TfLiteType, Initializer> reseters;
-
- // Generate singed 32-bit integer (s32) input
- initializers[kTfLiteInt32] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
- assert(_tfl_interp->tensor(id)->type == kTfLiteInt32);
- assert(_nnapi->tensor(id)->type == kTfLiteInt32);
-
- auto tfl_interp_view = nnfw::tflite::TensorView<int32_t>::make(*tfl_interp, id);
- auto nnapi_view = nnfw::tflite::TensorView<int32_t>::make(*nnapi, id);
-
- assert(tfl_interp_view.shape() == nnapi_view.shape());
-
- int32_t value = 0;
-
- nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- // TODO Generate random values
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- ++value;
- };
- };
-
- // Generate singed 32-bit integer (s32) input
- reseters[kTfLiteInt32] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
- assert(_tfl_interp->tensor(id)->type == kTfLiteInt32);
- assert(_nnapi->tensor(id)->type == kTfLiteInt32);
-
- auto tfl_interp_view = nnfw::tflite::TensorView<int32_t>::make(*tfl_interp, id);
- auto nnapi_view = nnfw::tflite::TensorView<int32_t>::make(*nnapi, id);
-
- assert(tfl_interp_view.shape() == nnapi_view.shape());
-
- int32_t value = 0;
-
- nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- // TODO Generate random values
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- };
- };
-
- initializers[kTfLiteUInt8] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
- assert(_tfl_interp->tensor(id)->type == kTfLiteUInt8);
- assert(_nnapi->tensor(id)->type == kTfLiteUInt8);
-
- auto tfl_interp_view = nnfw::tflite::TensorView<uint8_t>::make(*tfl_interp, id);
- auto nnapi_view = nnfw::tflite::TensorView<uint8_t>::make(*nnapi, id);
-
- assert(tfl_interp_view.shape() == nnapi_view.shape());
-
- auto fp = static_cast<uint8_t (nnfw::misc::RandomGenerator::*)(
- const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
- &nnfw::misc::RandomGenerator::generate<uint8_t>);
- const nnfw::misc::tensor::Object<uint8_t> data(tfl_interp_view.shape(),
- std::bind(fp, _randgen, _1, _2));
- assert(tfl_interp_view.shape() == data.shape());
-
- nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- const auto value = data.at(ind);
-
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- };
- };
-
- reseters[kTfLiteUInt8] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
- assert(_tfl_interp->tensor(id)->type == kTfLiteUInt8);
- assert(_nnapi->tensor(id)->type == kTfLiteUInt8);
-
- auto tfl_interp_view = nnfw::tflite::TensorView<uint8_t>::make(*tfl_interp, id);
- auto nnapi_view = nnfw::tflite::TensorView<uint8_t>::make(*nnapi, id);
-
- assert(tfl_interp_view.shape() == nnapi_view.shape());
-
- auto fp = static_cast<uint8_t (nnfw::misc::RandomGenerator::*)(
- const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
- &nnfw::misc::RandomGenerator::generate<uint8_t>);
- const nnfw::misc::tensor::Object<uint8_t> data(tfl_interp_view.shape(),
- std::bind(fp, _randgen, _1, _2));
- assert(tfl_interp_view.shape() == data.shape());
-
- uint8_t value = 0;
-
- nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- };
- };
-
- initializers[kTfLiteFloat32] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
- assert(_tfl_interp->tensor(id)->type == kTfLiteFloat32);
- assert(_nnapi->tensor(id)->type == kTfLiteFloat32);
-
- auto tfl_interp_view = nnfw::tflite::TensorView<float>::make(*tfl_interp, id);
- auto nnapi_view = nnfw::tflite::TensorView<float>::make(*nnapi, id);
-
- assert(tfl_interp_view.shape() == nnapi_view.shape());
-
- auto fp = static_cast<float (nnfw::misc::RandomGenerator::*)(
- const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
- &nnfw::misc::RandomGenerator::generate<float>);
- const nnfw::misc::tensor::Object<float> data(tfl_interp_view.shape(),
- std::bind(fp, _randgen, _1, _2));
-
- assert(tfl_interp_view.shape() == data.shape());
-
- nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- const auto value = data.at(ind);
-
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- };
- };
-
- reseters[kTfLiteFloat32] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
- assert(_tfl_interp->tensor(id)->type == kTfLiteFloat32);
- assert(_nnapi->tensor(id)->type == kTfLiteFloat32);
-
- auto tfl_interp_view = nnfw::tflite::TensorView<float>::make(*tfl_interp, id);
- auto nnapi_view = nnfw::tflite::TensorView<float>::make(*nnapi, id);
-
- assert(tfl_interp_view.shape() == nnapi_view.shape());
-
- auto fp = static_cast<float (nnfw::misc::RandomGenerator::*)(
- const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
- &nnfw::misc::RandomGenerator::generate<float>);
- const nnfw::misc::tensor::Object<float> data(tfl_interp_view.shape(),
- std::bind(fp, _randgen, _1, _2));
+ nnfw::tflite::OutputResetter resetter;
+ resetter.run(*(_tfl_interp.get()));
- assert(tfl_interp_view.shape() == data.shape());
+ RandomInputInitializer initializer{_randgen};
+ initializer.run(*(_tfl_interp.get()));
- float value = 0;
-
- nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- };
- };
-
- initializers[kTfLiteBool] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
- assert(_tfl_interp->tensor(id)->type == kTfLiteBool);
- assert(_nnapi->tensor(id)->type == kTfLiteBool);
-
- auto tfl_interp_view = nnfw::tflite::TensorView<bool>::make(*tfl_interp, id);
- auto nnapi_view = nnfw::tflite::TensorView<bool>::make(*nnapi, id);
-
- assert(tfl_interp_view.shape() == nnapi_view.shape());
-
- auto fp = static_cast<bool (nnfw::misc::RandomGenerator::*)(
- const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
- &nnfw::misc::RandomGenerator::generate<bool>);
- const nnfw::misc::tensor::Object<bool> data(tfl_interp_view.shape(),
- std::bind(fp, _randgen, _1, _2));
-
- assert(tfl_interp_view.shape() == data.shape());
-
- nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- const auto value = data.at(ind);
-
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- };
- };
-
- reseters[kTfLiteBool] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
- assert(_tfl_interp->tensor(id)->type == kTfLiteBool);
- assert(_nnapi->tensor(id)->type == kTfLiteBool);
-
- auto tfl_interp_view = nnfw::tflite::TensorView<bool>::make(*tfl_interp, id);
- auto nnapi_view = nnfw::tflite::TensorView<bool>::make(*nnapi, id);
-
- assert(tfl_interp_view.shape() == nnapi_view.shape());
-
- auto fp = static_cast<bool (nnfw::misc::RandomGenerator::*)(
- const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
- &nnfw::misc::RandomGenerator::generate<bool>);
- const nnfw::misc::tensor::Object<bool> data(tfl_interp_view.shape(),
- std::bind(fp, _randgen, _1, _2));
-
- assert(tfl_interp_view.shape() == data.shape());
-
- bool value = false;
-
- nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- };
- };
-
- // Fill IFM with random numbers
- for (const auto id : _tfl_interp->inputs())
- {
- assert(_tfl_interp->tensor(id)->type == _nnapi->tensor(id)->type);
-
- auto it = initializers.find(_tfl_interp->tensor(id)->type);
-
- if (it == initializers.end())
- {
- throw std::runtime_error{"Not supported input type"};
- }
-
- it->second(id, _tfl_interp.get(), _nnapi.get());
- }
-
- // Fill OFM with 0
- for (const auto id : _tfl_interp->outputs())
- {
- assert(_tfl_interp->tensor(id)->type == _nnapi->tensor(id)->type);
-
- auto it = reseters.find(_tfl_interp->tensor(id)->type);
-
- if (it == reseters.end())
- {
- throw std::runtime_error{"Not supported input type"};
- }
-
- it->second(id, _tfl_interp.get(), _nnapi.get());
- }
-}
-
-int RandomTestRunner::run(size_t running_count)
-{
std::cout << "[NNAPI TEST] Run T/F Lite Interpreter without NNAPI" << std::endl;
_tfl_interp->Invoke();
for (size_t i = 1; i <= running_count; ++i)
{
+ resetter.run(*(_nnapi.get()));
+
+ CopyInputInitializer copy_initializer{*(_tfl_interp.get())};
+ copy_initializer.run(*(_nnapi.get()));
+
std::cout << "[NNAPI TEST #" << i << "] Run T/F Lite Interpreter with NNAPI" << std::endl;
char *env = getenv("UPSTREAM_DELEGATE");
if (env && !std::string(env).compare("1"))
{
- _nnapi->UseNNAPI(true);
_nnapi->Invoke();
}
else
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the License);
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/TensorShapeUtils.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-
-nnfw::misc::tensor::Shape broadcast(const nnfw::misc::tensor::Shape &lhs_shape,
- const nnfw::misc::tensor::Shape &rhs_shape)
-{
- const uint32_t lhs_rank = lhs_shape.rank();
- const uint32_t rhs_rank = rhs_shape.rank();
- const uint32_t out_rank = std::max(lhs_rank, rhs_rank);
- const uint32_t lhs_rank_diff = out_rank - lhs_rank;
- const uint32_t rhs_rank_diff = out_rank - rhs_rank;
-
- nnfw::misc::tensor::Shape out_shape(out_rank);
-
- for (uint32_t axis = 0; axis < out_rank; ++axis)
- {
- out_shape.dim(axis) = std::max(axis < lhs_rank_diff ? 1 : lhs_shape.dim(axis - lhs_rank_diff),
- axis < rhs_rank_diff ? 1 : rhs_shape.dim(axis - rhs_rank_diff));
- }
-
- return out_shape;
-}
-
-} // namespace tflite
-} // namespace nnfw
/**
* @brief Extended operation types
*/
-typedef enum {
+typedef enum
+{
/** extends operation. */
/**
+++ /dev/null
-../../../.clang-format.8
\ No newline at end of file
* NNFW_VERSION is a uint32 value representing nnfw runtime version
* in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch
*/
-#define NNFW_VERSION 0x01000c00
+#define NNFW_VERSION 0x01000f00
#endif // __NNFW_VERSION_H__
namespace onert
{
-namespace frontend
-{
-namespace custom
+namespace api
{
using namespace backend::custom;
}
};
-Kernel::Kernel(const nnfw_custom_eval evalFunction)
+CustomKernel::CustomKernel(const nnfw_custom_eval evalFunction)
: _in_params(), _userdata(nullptr), _userdata_size(0), _evalFunction(evalFunction)
{
}
-void Kernel::configure(CustomKernelConfigParams &&inParams)
+void CustomKernel::configure(CustomKernelConfigParams &&inParams)
{
_userdata = inParams.userdata;
_userdata_size = inParams.userdata_size;
_in_params = std::move(inParams);
}
-void Kernel::run()
+void CustomKernel::run()
{
nnfw_custom_kernel_params params;
delete[] params.outputs;
}
-} // namespace custom
-} // namespace frontend
+} // namespace api
} // namespace onert
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CUSTOM_KERNEL_H__
-#define __ONERT_BACKEND_CUSTOM_KERNEL_H__
+#ifndef __ONERT_API_CUSTOM_KERNEL_H__
+#define __ONERT_API_CUSTOM_KERNEL_H__
#include "nnfw_experimental.h"
namespace onert
{
-namespace frontend
-{
-namespace custom
+namespace api
{
-class Kernel : public ::onert::exec::IFunction
+class CustomKernel : public ::onert::exec::IFunction
{
public:
- explicit Kernel(nnfw_custom_eval evalFunction);
+ explicit CustomKernel(nnfw_custom_eval evalFunction);
backend::custom::CustomKernelConfigParams _in_params;
void run() override;
};
-} // namespace custom
-} // namespace frontend
+} // namespace api
} // namespace onert
-#endif // __ONERT_BACKEND_CUSTOM_KERNEL_H__
+#endif // __ONERT_API_CUSTOM_KERNEL_H__
namespace onert
{
-namespace frontend
+namespace api
{
-namespace custom
+
+class KernelBuilder : public backend::custom::IKernelBuilder
{
+public:
+ KernelBuilder(CustomKernelRegistry *registry) : _registry(registry) {}
+
+ std::unique_ptr<exec::IFunction>
+ buildKernel(const std::string &id,
+ backend::custom::CustomKernelConfigParams &¶ms) const override
+ {
+ auto kernel = _registry->buildKernelForOp(id);
+ kernel->configure(std::move(params));
+
+ return kernel;
+ }
+
+private:
+ CustomKernelRegistry *_registry;
+};
-void KernelRegistry::registerKernel(const std::string &id, nnfw_custom_eval evalFunction)
+void CustomKernelRegistry::registerKernel(const std::string &id, nnfw_custom_eval evalFunction)
{
_storage.emplace(id, evalFunction);
}
-std::shared_ptr<backend::custom::IKernelBuilder> KernelRegistry::getBuilder()
+std::shared_ptr<backend::custom::IKernelBuilder> CustomKernelRegistry::getBuilder()
{
return std::make_unique<KernelBuilder>(this);
}
-std::unique_ptr<Kernel> KernelRegistry::buildKernelForOp(const std::string &id)
+std::unique_ptr<CustomKernel> CustomKernelRegistry::buildKernelForOp(const std::string &id)
{
auto it = _storage.find(id);
if (it == _storage.end())
throw std::runtime_error("Unable to find associated kernel for op");
}
- return std::make_unique<Kernel>(it->second);
+ return std::make_unique<CustomKernel>(it->second);
}
-// Kernel builder
-std::unique_ptr<exec::IFunction>
-KernelBuilder::buildKernel(const std::string &id,
- backend::custom::CustomKernelConfigParams &¶ms) const
-{
- auto kernel = _registry->buildKernelForOp(id);
- kernel->configure(std::move(params));
-
- return kernel;
-}
-
-KernelBuilder::KernelBuilder(KernelRegistry *registry) : _registry(registry) {}
-
-} // namespace custom
-} // namespace frontend
+} // namespace api
} // namespace onert
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CUSTOM_KERNEL_REGISTRY_H__
-#define __ONERT_BACKEND_CUSTOM_KERNEL_REGISTRY_H__
+#ifndef __ONERT_API_CUSTOM_KERNEL_REGISTRY_H__
+#define __ONERT_API_CUSTOM_KERNEL_REGISTRY_H__
#include "CustomKernel.h"
namespace onert
{
-namespace frontend
-{
-namespace custom
+namespace api
{
-class KernelRegistry
+class CustomKernelRegistry
{
public:
void registerKernel(const std::string &id, nnfw_custom_eval evalFunction);
std::shared_ptr<backend::custom::IKernelBuilder> getBuilder();
- std::unique_ptr<Kernel> buildKernelForOp(const std::string &id);
+ std::unique_ptr<CustomKernel> buildKernelForOp(const std::string &id);
private:
std::unordered_map<std::string, nnfw_custom_eval> _storage;
};
-class KernelBuilder : public backend::custom::IKernelBuilder
-{
-public:
- KernelBuilder(KernelRegistry *registry);
-
- std::unique_ptr<exec::IFunction>
- buildKernel(const std::string &id,
- backend::custom::CustomKernelConfigParams &¶ms) const override;
-
-private:
- KernelRegistry *_registry;
-};
-
-} // namespace custom
-} // namespace frontend
+} // namespace api
} // namespace onert
-#endif // __ONERT_BACKEND_CUSTOM_KERNEL_REGISTRY_H__
+#endif // __ONERT_API_CUSTOM_KERNEL_REGISTRY_H__
nnfw_session::nnfw_session()
: _subgraphs{nullptr}, _execution{nullptr},
- _kernel_registry{std::make_shared<onert::frontend::custom::KernelRegistry>()}, _tracing_ctx{
- nullptr}
+ _kernel_registry{std::make_shared<onert::api::CustomKernelRegistry>()}, _tracing_ctx{nullptr}
{
// DO NOTHING
}
try
{
- std::string manifest_file_name(package_dir);
- manifest_file_name += "/metadata/MANIFEST";
+ std::string package_path(package_dir);
+ std::string manifest_file_name = package_path + "/metadata/MANIFEST";
std::ifstream mfs(manifest_file_name);
// extract the filename of the first(index 0) model
if (!configs.empty() && !configs[0].empty())
{
- auto filepath = package_dir + std::string("/metadata/") + configs[0].asCString();
+ auto filepath = package_path + std::string("/metadata/") + configs[0].asString();
CfgKeyValues keyValues;
if (loadConfigure(filepath, keyValues))
}
}
- auto model_file_path = package_dir + std::string("/") + models[0].asString(); // first model
+ auto model_file_path = package_path + std::string("/") + models[0].asString(); // first model
auto model_type = model_types[0].asString(); // first model's type
if (model_type == "tflite")
{
- _subgraphs = onert::tflite_loader::loadModel(model_file_path.c_str());
+ _subgraphs = onert::tflite_loader::loadModel(model_file_path);
}
else if (model_type == "circle")
{
- _subgraphs = onert::circle_loader::loadModel(model_file_path.c_str());
+ _subgraphs = onert::circle_loader::loadModel(model_file_path);
}
else
{
return NNFW_STATUS_INVALID_STATE;
}
- if (!_subgraphs || !primary_subgraph() || primary_subgraph()->isBuildingPhase())
- {
- std::cerr << "Error during model prepare : "
- << "prepare should be run after load_model" << std::endl;
- return NNFW_STATUS_ERROR;
- }
-
try
{
_subgraphs.reset();
{
// In this case, if we apply input shape in primary_subgraph, it will propagate after
// compilation and excution
- auto ind = primary_subgraph()->getInputs().at(index);
- auto &input = primary_subgraph()->operands().at(ind);
+ auto primary_subgraph = _subgraphs->primary();
+ auto ind = primary_subgraph->getInputs().at(index);
+ auto &input = primary_subgraph->operands().at(ind);
// overwrite input shape with the shape from ti
input.info().shape(new_shape);
{
options.graph_dump_level = toInt(value);
}
- else if (skey == config::OP_SEQ_MAX_NODE)
- {
- options.op_seq_max_node = toInt(value);
- }
else if (skey == config::EXECUTOR)
{
options.executor = value;
return NNFW_STATUS_NO_ERROR;
}
-onert::ir::Graph *nnfw_session::primary_subgraph()
+const onert::ir::Graph *nnfw_session::primary_subgraph()
{
if (_subgraphs)
{
assert(_execution);
// TODO Remove const_cast
// We assumed the graph will not change after compilation, but shape could change
- return const_cast<onert::ir::Graph *>(&_execution->primary_subgraph());
+ return &_execution->primary_subgraph();
}
}
assert(_subgraphs);
assert(_compiler);
assert(!_execution);
- assert(!primary_subgraph()->isBuildingPhase());
return true;
}
else
assert(!_subgraphs);
assert(_compiler);
assert(_execution);
- assert(!primary_subgraph()->isBuildingPhase());
return true;
}
else
assert(!_subgraphs);
assert(_compiler);
assert(_execution);
- assert(!primary_subgraph()->isBuildingPhase());
return true;
}
return false;
assert(!_subgraphs);
assert(_compiler);
assert(_execution);
- assert(!primary_subgraph()->isBuildingPhase());
return true;
}
else
namespace onert
{
-namespace frontend
+namespace api
{
-namespace custom
-{
-class KernelRegistry;
-}
-} // namespace frontend
+class CustomKernelRegistry;
+} // namespace api
namespace exec
{
class Execution;
NNFW_STATUS output_tensorindex(const char *tensorname, uint32_t *index);
private:
- onert::ir::Graph *primary_subgraph();
+ const onert::ir::Graph *primary_subgraph();
bool isStateInitialized();
bool isStateModelLoaded();
bool isStatePrepared();
std::shared_ptr<onert::ir::Subgraphs> _subgraphs;
std::unique_ptr<onert::compiler::Compiler> _compiler;
std::unique_ptr<onert::exec::Execution> _execution;
- std::shared_ptr<onert::frontend::custom::KernelRegistry> _kernel_registry;
+ std::shared_ptr<onert::api::CustomKernelRegistry> _kernel_registry;
std::unique_ptr<onert::util::TracingCtx> _tracing_ctx;
};
std::shared_ptr<IConfig> config() const override { return _config; }
- std::unique_ptr<backend::BackendContext>
- newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &,
- bool is_linear_executor) const override
+ std::unique_ptr<backend::BackendContext> newContext(ContextData &&data) const override
{
- const auto &operands = graph.operands();
- const auto &operations = graph.operations();
- auto context = std::make_unique<acl_cl::BackendContext>(this, &graph);
- auto tm = createTensorManager(is_linear_executor);
+ const auto &graph = *data.graph;
+ const auto &operands = data.graph->operands();
+ auto context = std::make_unique<acl_cl::BackendContext>(this, std::move(data));
+ auto tm = createTensorManager(data.is_linear_executor);
auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
auto tb = std::make_shared<TensorBuilder>(operands, tm);
context->tensor_registry = tr;
context->tensor_builder = tb;
context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
- context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr);
+ context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr);
context->optimizer = std::make_shared<Optimizer>(context.get());
return context;
}
void BackendContext::initConsts()
{
- for (auto &op : operation_list())
- {
- constant_initializer->setLayout(op.layout);
- graph()->operations().at(op.index).accept(*constant_initializer);
- }
+ _data.graph->operations().iterate([&](const ir::OperationIndex &, const ir::Operation &op) {
+ constant_initializer->setLayout(graph()->layout());
+ op.accept(*constant_initializer);
+ });
- for (auto ind : operand_list())
- {
+ _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
+ if (_data.external_operands.contains(ind) || !operand.isConstant())
+ return;
const auto &obj = graph()->operands().at(ind);
if (obj.isConstant() && !constant_initializer->exist(ind))
{
constant_initializer->registerDefaultInitializer(ind, obj);
}
- }
+ });
constant_initializer->run();
}
-void BackendContext::planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info)
+void BackendContext::planTensors()
{
ir::OperandIndexMap<uint32_t> uses_map;
ir::OperandIndexMap<uint32_t> def_map;
ir::OperandIndexSequence constants;
// Prepare scanning
- for (auto ind : operand_list())
- {
- const auto &obj = graph()->operands().at(ind);
- const auto &li = lower_info.operand.at(ind);
- if (li->def_factors().getOnlyElement().backend() != backend())
- continue;
-
- // Ignore unused tensor
- if (li->def_factors().size() == 0 && li->use_factors().size() == 0)
- {
- VERBOSE(planTensors) << "Operand #" << ind.value() << " will not be used. no more process."
- << std::endl;
+ _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+ if (_data.external_operands.contains(ind))
return;
- }
uses_map[ind] = obj.getUses().size();
def_map[ind] = obj.getDef().valid() ? 1 : 0;
if (obj.isConstant())
constants.append(ind);
- auto factor = li->def_factors().getOnlyElement();
if (!tensor_builder->isRegistered(ind))
{
- // These tensors do not exist in any op_seq (No use and def)
+ // These tensors do not exist in any operation (No use and def)
const auto info = obj.info();
- const auto backend_layout = factor.layout();
+ const auto layout = _data.operand_layouts.at(ind);
// TODO Change tensor info to have permuted shape
- tensor_builder->registerTensorInfo(ind, info, backend_layout);
+ tensor_builder->registerTensorInfo(ind, info, layout);
}
- }
+ });
// Start scanning to do notify{First|Last}Use for each tensor
// 1. Scan DEF of outputs. If the DEF, allocate it
// 2. Scan DEF of inputs. If variable tensor, allocate it
// 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
- for (const auto op_seq_ind : order)
+ for (const auto op_ind : _data.op_order)
{
- const auto &op_seq = op_seqs.at(op_seq_ind);
- for (const auto &op_idx : op_seq.operations())
- {
- auto &op = graph()->operations().at(op_idx);
- auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
- auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+ const auto &op = graph()->operations().at(op_ind);
+ auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+ auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
- // Define outputs
- for (const auto &ind : op_outputs)
+ // Define outputs
+ for (const auto &ind : op_outputs)
+ {
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ assert(def_map.find(ind) != def_map.end());
+ if (def_map[ind])
{
- if (!tensor_builder->isRegistered(ind))
- continue;
- assert(def_map.find(ind) != def_map.end());
- if (def_map[ind])
- {
- def_map[ind] = 0;
- tensor_builder->notifyFirstUse(ind);
- }
+ def_map[ind] = 0;
+ tensor_builder->notifyFirstUse(ind);
}
+ }
- // Scan variable tensors
- // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
- // non-constant because of less memory usage by memory planning in here
- for (const auto &ind : op_inputs)
+ // Scan variable tensors
+ // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
+ // non-constant because of less memory usage by memory planning in here
+ for (const auto &ind : op_inputs)
+ {
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ const auto &operand = graph()->operands().at(ind);
+ if (operand.info().isVariable())
{
- if (!tensor_builder->isRegistered(ind))
- continue;
- const auto &operand = graph()->operands().at(ind);
- if (operand.info().isVariable())
- {
- // The variable tensor with buffer is not supported yet
- assert(operand.data() == nullptr);
- assert(operand.getUses().size() == 1 && !operand.getDef().valid());
- assert(lower_info.operand.at(ind)->def_factors().size() == 1 &&
- lower_info.operand.at(ind)->use_factors().size() == 1);
- assert(uses_map[ind] == 1 && def_map[ind] == 0);
- tensor_builder->notifyFirstUse(ind);
- }
+ // The variable tensor with buffer is not supported yet
+ assert(operand.data() == nullptr);
+ assert(operand.getUses().size() == 1 && !operand.getDef().valid());
+ assert(uses_map[ind] == 1 && def_map[ind] == 0);
+ tensor_builder->notifyFirstUse(ind);
}
+ }
- for (const auto &ind : op_inputs)
+ for (const auto &ind : op_inputs)
+ {
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ assert(uses_map.find(ind) != uses_map.end());
+ assert(uses_map[ind] > 0);
+ uses_map[ind]--;
+ if (uses_map[ind] == 0)
{
- if (!tensor_builder->isRegistered(ind))
- continue;
- assert(uses_map.find(ind) != uses_map.end());
- assert(uses_map[ind] > 0);
- uses_map[ind]--;
- if (uses_map[ind] == 0)
- {
- // plan for deallocation of static tensornode
- tensor_builder->notifyLastUse(ind);
- }
+ // plan for deallocation of static tensornode
+ tensor_builder->notifyLastUse(ind);
}
}
}
+ _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
+ if (uses_map[ind] == 0)
+ {
+ tensor_builder->notifyLastUse(ind);
+ }
+ });
+
// Dispose and validate
for (const auto &ind : constants)
{
}
assert(
- std::all_of(uses_map.begin(), uses_map.end(),
- [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+ std::all_of(uses_map.begin(), uses_map.end(),
+ [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
assert(
- std::all_of(def_map.begin(), def_map.end(),
- [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+ std::all_of(def_map.begin(), def_map.end(),
+ [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
}
-ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs,
- const ir::LowerInfoMap &lower_info)
+ITensorRegistry *BackendContext::genTensors()
{
optimizer->optimize();
- for (const auto op_seq_ind : order)
- {
- const auto &op_seq = op_seqs.at(op_seq_ind);
- auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
- ir::Remove::DUPLICATED;
- for (const auto op_ind : op_seq)
- {
- bool op_assigned = [&]() {
- for (auto &op_info : operation_list())
- if (op_info.index == op_ind)
- return true;
- return false;
- }();
- if (!op_assigned)
- continue;
+ graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+ if (external_operands().contains(ind))
+ return;
- const auto &op = graph()->operations().at(op_ind);
- for (const auto &index : (op.getInputs() + op.getOutputs()) | ir::Remove::UNDEFINED)
- {
- if (!tensor_builder->isRegistered(index) && !model_io.contains(index) &&
- find(operand_list().begin(), operand_list().end(), index) != operand_list().end())
- {
- const auto &operand_lower_info =
- lower_info.operand.at(index)->def_factors().getOnlyElement();
-
- // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl)
- // op.getOutputs() of permute (CPU) returns tensor A
- // but tensor A belongs to the backend of acl_cl.
- // So, we have to make this tensor NOT registered for CPU.
- if (operand_lower_info.backend() != backend())
- continue;
-
- const auto &obj = graph()->operands().at(index);
- const auto frontend_layout = op_seq.getLayout();
- const auto backend_layout = operand_lower_info.layout();
- ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
- obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
- tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
- }
- }
- }
- }
+ const auto frontend_layout = graph()->layout();
+ const auto backend_layout = operand_layouts().at(ind);
+ ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+ obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+ tensor_builder->registerTensorInfo(ind, backend_info, backend_layout);
+ });
// TODO Get compiler options from compiler, and use it rather than getting it from Env
if (util::getConfigString(util::config::EXECUTOR) == "Linear")
{
- planTensors(order, op_seqs, lower_info);
+ planTensors();
}
else
{
// For the executors that does not have fixed linear execution order:
// To make tensors never be deallocated, this is a workaround to use static memory planner
- for (auto ind : operand_list())
- {
+ graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
if (tensor_builder->isRegistered(ind))
tensor_builder->notifyFirstUse(ind);
- }
+ });
}
tensor_builder->prepare();
return tensor_registry.get();
}
-FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs)
+FunctionMap BackendContext::genKernels()
{
FunctionMap ret;
- for (auto op_seq_ind : order)
+ for (auto op_ind : _data.op_order)
{
- const auto &op_seq = op_seqs.at(op_seq_ind);
- bool assigned = [&]() {
- for (auto op_info : operation_list())
- if (op_seq.exist(op_info.index))
- return true;
- return false;
- }();
- if (!assigned)
- continue;
- auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
- ret.emplace_back(op_seq_ind, std::move(fn_seq));
+ auto fn_seq = kernel_gen->generate(op_ind);
+ ret.emplace_back(op_ind, std::move(fn_seq));
}
tensor_builder->allocate();
initConsts();
// NOTE For memory optimization, we want to free some operand data
- for (auto ind : operand_list())
- {
- // TODO Remove const_cast
- auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
- obj.releaseData();
- }
+ const_cast<ir::Graph &>(*_data.graph)
+ .operands()
+ .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
for (auto &it : ret)
{
class BackendContext : public onert::backend::BackendContext
{
public:
- BackendContext(const Backend *backend, const ir::Graph *graph,
+ BackendContext(const Backend *backend, ContextData &&data,
std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
- : onert::backend::BackendContext(backend, graph, tensor_registry),
- tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
- kernel_gen{kernel_gen}
+ : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
+ tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, kernel_gen{
+ kernel_gen}
{
}
- ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs,
- const ir::LowerInfoMap &lower_info) override;
- FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs) override;
+ ITensorRegistry *genTensors() override;
+ FunctionMap genKernels() override;
private:
void initConsts();
- void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+ void planTensors();
public:
std::shared_ptr<TensorBuilder> tensor_builder;
const cl_event *event_wait_list, cl_event *usr_event) {
cl_event event;
cl_int enqueue_res =
- this->_origin_enqueue_function(command_queue, kernel, work_dim, gwo, gws, lws,
- num_events_in_wait_list, event_wait_list, &event);
+ this->_origin_enqueue_function(command_queue, kernel, work_dim, gwo, gws, lws,
+ num_events_in_wait_list, event_wait_list, &event);
this->_measured_events.emplace_back(event);
// According to spec, if NULL was provided in usr_event - event shouldn't be returned
if ((props & CL_QUEUE_PROFILING_ENABLE) == 0)
{
cl_scheduler.set_queue(
- cl::CommandQueue(cl_scheduler.context(), props | CL_QUEUE_PROFILING_ENABLE));
+ cl::CommandQueue(cl_scheduler.context(), props | CL_QUEUE_PROFILING_ENABLE));
}
};
// NOTE CLKernelLibraryEx must use the same context as CLScheduler
// It did not check whether another device is available.
arm_compute::CLKernelLibraryEx::get().init(
- "./cl_kernels/", arm_compute::CLScheduler::get().context(), cl::Device::getDefault());
+ "./cl_kernels/", arm_compute::CLScheduler::get().context(), cl::Device::getDefault());
return true;
}
ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
const std::shared_ptr<ITensorRegistry> &tensor_reg)
- : acl_common::AclConstantInitializer{operands, tensor_reg}
+ : acl_common::AclConstantInitializer{operands, tensor_reg}
{
// DO NOTHING
}
const auto &shape = model_obj.shape();
const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
assert(model_obj.shape().rank() == 2);
- assert(obj.dimension(0) == 2);
+ assert(obj.getShape().dim(0) == 2);
obj.access([&](ITensor &tensor) {
for (auto i = 0; i < shape.dim(0); ++i)
{
{
const int32_t value = base[i * 2 + j];
int32_t *into = reinterpret_cast<int32_t *>(
- tensor.buffer() + tensor.calcOffset({shape.dim(0) - i - 1, j}));
+ tensor.buffer() + tensor.calcOffset({shape.dim(0) - i - 1, j}));
*into = value;
}
}
}
auto axis =
- acl_common::ToARMComputeAxis(ifm_rank, axis_tmp, frontend_layout, backend_layout).value();
+ acl_common::ToARMComputeAxis(ifm_rank, axis_tmp, frontend_layout, backend_layout).value();
obj.access([&](ITensor &tensor) {
int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer());
using ::onert::backend::acl_common::asAclFunction;
using ActivationBuilder = ::onert::backend::acl_common::AclActivationBuilder<
- ::arm_compute::ICLTensor, ::arm_compute::CLActivationLayer, acl_common::AclFunction>;
+ ::arm_compute::ICLTensor, ::arm_compute::CLActivationLayer, acl_common::AclFunction>;
KernelGenerator::KernelGenerator(
- const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder,
- const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
- : _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
- _tensor_reg(tensor_reg), _current_layout(ir::Layout::UNKNOWN)
+ const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
+ : basic::KernelGeneratorBase{graph}, _ctx(graph.operands()),
+ _operations_ctx(graph.operations()), _current_layout{graph.layout()},
+ _tensor_builder(tensor_builder), _tensor_reg(tensor_reg)
{
// DO NOTHING
}
-void KernelGenerator::visit(const ir::OpSequence &op_seq)
+std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
{
- // TODO Move this to IKernelGenerator
- // (all derivatives have the same implementation for this)
- assert(!_return_fn_seq);
- _return_fn_seq = std::make_unique<exec::FunctionSequence>();
- _return_fn_seq->enableDynamicShapeInferer(false);
-
- _current_layout = op_seq.getLayout();
- for (const auto &operation_idx : op_seq.operations())
- {
- const auto &node = _operations_ctx.at(operation_idx);
- node.accept(*this);
- _return_fn_seq->append(releaseFunction());
- }
+ auto ret = std::make_unique<exec::FunctionSequence>();
+ ret->enableDynamicShapeInferer(false);
+
+ const auto &op = _graph.operations().at(ind);
+ op.accept(*this);
+ ret->append(releaseFunction());
+ return ret;
}
void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
const auto block_size_index{
- node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
+ node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
const auto NNApiInputs = 2;
if (node.getInputs().size() != NNApiInputs)
assert(_ctx.at(block_size_index).data());
auto fn = acl_common::generateLayer<arm_compute::CLBatchToSpaceLayer>(
- ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
+ ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
_return_fn = asAclFunction(std::move(fn));
}
case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
{
fn = acl_common::generateLayer<arm_compute::CLArithmeticAddition>(
- lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
- arm_compute::ConvertPolicy::SATURATE, act_info);
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+ arm_compute::ConvertPolicy::SATURATE, act_info);
break;
}
case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
{
fn = acl_common::generateLayer<arm_compute::CLArithmeticSubtraction>(
- lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
- arm_compute::ConvertPolicy::SATURATE, act_info);
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+ arm_compute::ConvertPolicy::SATURATE, act_info);
break;
}
case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
{
fn = acl_common::generateLayer<arm_compute::CLPixelWiseMultiplication>(
- lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
- arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN,
- act_info);
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
+ arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN,
+ act_info);
break;
}
case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
{
fn = acl_common::generateLayer<arm_compute::CLArithmeticDivision>(
- lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), act_info);
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), act_info);
break;
}
default:
const auto ker_width = ker_shape.dim(2);
const auto stride = node.param().stride;
- const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
- ker_width, ker_height);
+ const auto padding =
+ ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, ker_width, ker_height);
const auto activation = node.param().activation;
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
const auto act_info = acl_common::asActivationLayerInfo(activation);
auto fn = acl_common::generateLayer<arm_compute::CLConvolutionLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
- ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info,
- ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
+ ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info,
+ ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
_return_fn = asAclFunction(std::move(fn));
}
const auto stride = node.param().stride;
const auto dilation = node.param().dilation;
const auto padding =
- ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, ker_width,
- ker_height, dilation.width_factor, dilation.height_factor);
+ ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+ dilation.width_factor, dilation.height_factor);
const auto multiplier = node.param().multiplier;
const auto activation = node.param().activation;
const auto dilation_info = acl_common::asDilation(dilation.width_factor, dilation.height_factor);
auto fn = acl_common::generateLayer<arm_compute::CLDepthwiseConvolutionLayer>(
- ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(),
- conv_info, multiplier, act_info, dilation_info);
+ ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(),
+ conv_info, multiplier, act_info, dilation_info);
_return_fn = asAclFunction(std::move(fn));
}
}
auto output_tensor = _tensor_reg->getAclTensor(ofm_index);
- std::vector<::arm_compute::ICLTensor *> input_tensors;
+ std::vector<const ::arm_compute::ICLTensor *> input_tensors;
for (auto &ifm_ind : input_indexes)
input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle());
std::unique_ptr<::arm_compute::IFunction> fn;
if (input_indexes.size() < 2)
{
- fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tensors.at(0),
- output_tensor->handle());
+ ::arm_compute::ICLTensor *input_tesor =
+ _tensor_reg->getAclTensor(input_indexes.at(0))->handle();
+
+ fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tesor, output_tensor->handle());
}
else
{
const auto frontend_layout = _current_layout;
const auto backend_layout = output_tensor->layout();
const auto fixed_axis =
- acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
+ acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
fn = acl_common::generateLayer<::arm_compute::CLConcatenateLayer>(
- input_tensors, output_tensor->handle(), fixed_axis);
+ input_tensors, output_tensor->handle(), fixed_axis);
}
_return_fn = asAclFunction(std::move(fn));
const auto activation = node.param().activation;
if (node.param().weights_format == ir::FullyConnectedWeightsFormat::Shuffled16x1Float32)
throw std::runtime_error(
- "KernelGenerator(acl_cl): FullyConnected 16x1Float32 weights is not supported.");
+ "KernelGenerator(acl_cl): FullyConnected 16x1Float32 weights is not supported.");
auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ICLTensor,
::arm_compute::CLFullyConnectedReshapingLayer>(
- node, _ctx, _tensor_builder, _tensor_reg, _current_layout);
+ node, _ctx, _tensor_builder, _tensor_reg, _current_layout);
_return_fn = std::make_unique<exec::FunctionSequence>(
- std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
+ std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Reduce &node)
if (reduce_type == ir::operation::Reduce::ReduceType::MEAN)
{
const auto acl_axes =
- acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
+ acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
fn = acl_common::generateLayer<arm_compute::CLReduceMean>(input_tensor->handle(), acl_axes,
keep_dims, output_tensor->handle());
}
const auto acl_axes = acl_common::asSet(axes, input_rank, frontend_layout, backend_layout);
fn = acl_common::generateLayer<arm_compute::CLReduceOperation>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
- output_tensor->handle(), acl_axes, keep_dims, acl_common::convertReduceType(reduce_type));
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+ output_tensor->handle(), acl_axes, keep_dims, acl_common::convertReduceType(reduce_type));
}
_return_fn = asAclFunction(std::move(fn));
auto input_tensor = _tensor_reg->getAclTensor(input_index);
auto fn = acl_common::generateLayer<arm_compute::CLSoftmaxLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
- output_tensor->handle(), beta);
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+ output_tensor->handle(), beta);
_return_fn = asAclFunction(std::move(fn));
}
{
auto axis = ::onert::backend::acl_common::ToARMComputeAxis(input_rank, n, frontend_layout,
backend_layout)
- .value();
+ .value();
int32_t begin_value = *(reinterpret_cast<const int32_t *>(beginData_base) + n);
starts[axis] = begin_value;
}
auto fn = acl_common::generateLayer<arm_compute::CLSlice>(
- inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
+ inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
_return_fn = asAclFunction(std::move(fn));
}
{
auto axis = ::onert::backend::acl_common::ToARMComputeAxis(input_rank, n, frontend_layout,
backend_layout)
- .value();
+ .value();
int32_t start_value = *(reinterpret_cast<const int32_t *>(startData_base) + n);
starts[axis] = start_value;
const auto end_mask = acl_common::ReorderBits<int32_t>(node.param().end_mask, input_rank,
frontend_layout, backend_layout);
const auto shrink_axis_mask = acl_common::ReorderBits<int32_t>(
- node.param().shrink_axis_mask, input_rank, frontend_layout, backend_layout);
+ node.param().shrink_axis_mask, input_rank, frontend_layout, backend_layout);
::arm_compute::Coordinates starts_set;
::arm_compute::Coordinates ends_set;
}
auto fn = acl_common::generateLayer<arm_compute::CLStridedSlice>(
- inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
- begin_mask, end_mask, shrink_axis_mask);
+ inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
+ begin_mask, end_mask, shrink_axis_mask);
// Revert disabling applied dim_correction
if (inputData_tensor->dimension(0) == 1)
else
{
auto backend_pv =
- acl_common::getARMComputePermutationVector(rank, pv, frontend_layout, backend_layout);
+ acl_common::getARMComputePermutationVector(rank, pv, frontend_layout, backend_layout);
fn = acl_common::generateLayer<arm_compute::CLPermute>(ifm_tensor->handle(),
ofm_tensor->handle(), backend_pv);
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
- const ::arm_compute::ActivationLayerInfo act_info = acl_common::asActivationLayerInfo(
- node.param().op_type, node.param().alpha, node.param().beta);
+ const ::arm_compute::ActivationLayerInfo act_info =
+ acl_common::asActivationLayerInfo(node.param().op_type, node.param().alpha, node.param().beta);
auto fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
- ifm_tensor->handle(), ofm_tensor->handle(), act_info);
+ ifm_tensor->handle(), ofm_tensor->handle(), act_info);
_return_fn = asAclFunction(std::move(fn));
}
case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
{
fn = acl_common::generateLayer<arm_compute::CLBinaryLogicalOp>(
- lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle(),
- arm_compute::BinaryLogicalOperation::AND);
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle(),
+ arm_compute::BinaryLogicalOperation::AND);
break;
}
case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
{
fn = acl_common::generateLayer<arm_compute::CLBitwiseOr>(
- lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
break;
}
case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
{
fn = acl_common::generateLayer<arm_compute::CLElementwiseMax>(
- lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
break;
}
case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
{
fn = acl_common::generateLayer<arm_compute::CLElementwiseMin>(
- lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
break;
}
default:
case ir::operation::ElementwiseUnary::Type::ABS:
{
const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
- input_tensor->handle(), output_tensor->handle(), act_info);
+ input_tensor->handle(), output_tensor->handle(), act_info);
break;
}
case ir::operation::ElementwiseUnary::Type::CAST:
{
// TODO Support converting float to int32 as round down
fn = acl_common::generateLayer<arm_compute::CLCast>(
- input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
+ input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
}
break;
}
case ir::operation::ElementwiseUnary::Type::SQRT:
{
const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
- input_tensor->handle(), output_tensor->handle(), act_info);
+ input_tensor->handle(), output_tensor->handle(), act_info);
break;
}
default:
auto activation = node.param().activation;
auto fn = acl_common::generateLayer<arm_compute::CLInstanceNormalizationLayerEx>(
- ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(),
- epsilon);
+ ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(),
+ epsilon);
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
+ asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::LSTM &node)
auto input1_tensor = _tensor_reg->getAclTensor(input1_index);
auto fn = acl_common::generateLayer<arm_compute::CLComparison>(
- input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
- (arm_compute::ComparisonOperation)comparison_type);
+ input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
+ (arm_compute::ComparisonOperation)comparison_type);
_return_fn = asAclFunction(std::move(fn));
}
if (offvalue.isConstant())
{
fn = acl_common::generateLayer<arm_compute::CLOneHot>(
- indices_tensor->handle(), onvalue_tensor->handle(), output_tensor->handle(),
- acl_common::asPixelValue(offvalue), static_cast<uint32_t>(depth), axis);
+ indices_tensor->handle(), onvalue_tensor->handle(), output_tensor->handle(),
+ acl_common::asPixelValue(offvalue), static_cast<uint32_t>(depth), axis);
}
else
{
auto offvalue_tensor = _tensor_reg->getAclTensor(offvalue_idx);
fn = acl_common::generateLayer<arm_compute::CLOneHot>(
- indices_tensor->handle(), onvalue_tensor->handle(), offvalue_tensor->handle(),
- output_tensor->handle(), static_cast<uint32_t>(depth), axis);
+ indices_tensor->handle(), onvalue_tensor->handle(), offvalue_tensor->handle(),
+ output_tensor->handle(), static_cast<uint32_t>(depth), axis);
}
if (output_tensor->dimension(0) == 1)
void KernelGenerator::visit(const ir::operation::Pool2D &node)
{
auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
- node, _ctx, _tensor_reg, _current_layout, acl_common::convertPoolType(node.param().op_type));
+ node, _ctx, _tensor_reg, _current_layout, acl_common::convertPoolType(node.param().op_type));
const auto ofm_index{node.getOutputs().at(0)};
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
const auto activation = node.param().activation;
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(raw_fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
+ asAclFunction(std::move(raw_fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Permute &node)
auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
auto fn = acl_common::generateLayer<arm_compute::CLScale>(
- ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::InterpolationPolicy::BILINEAR,
- ::arm_compute::BorderMode::REPLICATE, ::arm_compute::PixelValue(0.f),
- ::arm_compute::SamplingPolicy::TOP_LEFT);
+ ifm_tensor->handle(), ofm_tensor->handle(),
+ ::arm_compute::ScaleKernelInfo{
+ ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
+ ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT});
_return_fn = asAclFunction(std::move(fn));
}
auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
auto fn = acl_common::generateLayer<arm_compute::CLScale>(
- ifm_tensor->handle(), ofm_tensor->handle(),
+ ifm_tensor->handle(), ofm_tensor->handle(),
+ ::arm_compute::ScaleKernelInfo{
::arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR, ::arm_compute::BorderMode::REPLICATE,
- ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
+ ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT});
_return_fn = asAclFunction(std::move(fn));
}
{
const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)};
const auto hidden_state_out_index{
- node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
+ node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)};
const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)};
const auto recurrent_weights_index{
- node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
+ node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)};
const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)};
auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
auto copy_layer = acl_common::generateLayer<arm_compute::CLCopy>(
- hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
+ hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
_return_fn = asAclFunction(std::move(copy_layer));
auto fn = acl_common::generateLayer<arm_compute::CLRNNLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
- weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(),
- hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+ weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(),
+ hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
_return_fn = asAclFunction(std::move(fn));
}
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
const auto block_size_index{
- node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
+ node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
assert(_ctx.at(paddings_index).data());
auto fn = acl_common::generateLayer<arm_compute::CLSpaceToBatchLayer>(
- ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
- ofm_tensor->handle());
+ ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
+ ofm_tensor->handle());
_return_fn = asAclFunction(std::move(fn));
}
auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
auto fn = acl_common::generateLayer<arm_compute::CLSpaceToDepthLayer>(
- ifm_tensor->handle(), ofm_tensor->handle(), block_size);
+ ifm_tensor->handle(), ofm_tensor->handle(), block_size);
_return_fn = asAclFunction(std::move(fn));
}
auto values_tensor = _tensor_reg->getAclTensor(values_index);
auto fn = acl_common::generateLayer<arm_compute::CLEmbeddingLookup>(
- values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
+ values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
_return_fn = asAclFunction(std::move(fn));
}
// TODO Support optional constant dimension that normalization would be performed on
const auto normalization_axis = _ctx.at(ifm_index).shape().rank() - 1;
int32_t radius =
- 2 * ifm_shape.dim(normalization_axis) + 1; // normSize = depth(last dimension) * 2 + 1
- float alpha = 1.0f; // In the implementation to make alpha_ become 1
- float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction)
- float bias = 0.0f; // Don't offset the reduction.
+ 2 * ifm_shape.dim(normalization_axis) + 1; // normSize = depth(last dimension) * 2 + 1
+ float alpha = 1.0f; // In the implementation to make alpha_ become 1
+ float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction)
+ float bias = 0.0f; // Don't offset the reduction.
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
radius, alpha, beta, bias, false);
auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>(
- ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
+ ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
_return_fn = asAclFunction(std::move(fn));
}
auto values_tensor = _tensor_reg->getAclTensor(values_index);
auto fn = acl_common::generateLayer<arm_compute::CLHashtableLookup>(
- lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
- output_tensor->handle(), hits_tensor->handle());
+ lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
+ output_tensor->handle(), hits_tensor->handle());
_return_fn = asAclFunction(std::move(fn));
}
auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index);
auto fn = acl_common::generateLayer<arm_compute::CLPReluLayer>(
- ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
+ ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
_return_fn = asAclFunction(std::move(fn));
}
if (node.param().padding.type == ir::PaddingType::VALID)
{
invalid_horizontal =
- ofm_shape.W - (1 + (ifm_shape.W - 1) * stride.horizontal) - (ker_shape.W - 1);
+ ofm_shape.W - (1 + (ifm_shape.W - 1) * stride.horizontal) - (ker_shape.W - 1);
invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
}
const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
auto fn = acl_common::generateLayer<arm_compute::CLTransposeConvLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
- ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info, invalid_horizontal,
- invalid_vertical);
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
+ ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info, invalid_horizontal,
+ invalid_vertical);
_return_fn = asAclFunction(std::move(fn));
}
auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
auto fn = acl_common::generateLayer<arm_compute::CLElementwiseSquaredDiff>(
- lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
_return_fn = asAclFunction(std::move(fn));
}
{
const auto outputValues_index{node.getOutputs().at(ir::operation::TopKV2::Output::OUTPUT_VALUES)};
const auto outputIndices_index{
- node.getOutputs().at(ir::operation::TopKV2::Output::OUTPUT_INDICES)};
+ node.getOutputs().at(ir::operation::TopKV2::Output::OUTPUT_INDICES)};
const auto inputData_index{node.getInputs().at(ir::operation::TopKV2::Input::INPUT)};
auto input_tensor = _tensor_reg->getAclTensor(inputData_index);
auto fn = acl_common::generateLayer<arm_compute::CLTopKV2>(
- input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle());
+ input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle());
_return_fn = asAclFunction(std::move(fn));
}
}
auto fn = acl_common::generateLayer<arm_compute::CLGatherEx>(
- ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
+ ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
// Revert disabling applied dim_correction
if (ifm_tensor->dimension(0) == 1)
}
auto acl_axis =
- acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
+ acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
auto reduce_type = node.param().is_arg_max ? ::arm_compute::ReductionOperation::ARG_IDX_MAX
: ::arm_compute::ReductionOperation::ARG_IDX_MIN;
auto fn = acl_common::generateLayer<arm_compute::CLArgMinMaxLayerEx>(
- ifm_tensor->handle(), acl_axis, ofm_tensor->handle(), reduce_type);
+ ifm_tensor->handle(), acl_axis, ofm_tensor->handle(), reduce_type);
_return_fn = asAclFunction(std::move(fn));
}
{
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{
- node.getInputs().at(ir::operation::LocalResponseNormalization::Input::INPUT)};
+ node.getInputs().at(ir::operation::LocalResponseNormalization::Input::INPUT)};
auto radius = node.param().radius;
auto alpha = node.param().alpha;
auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
const auto norm_info = ::arm_compute::NormalizationLayerInfo(
- ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
+ ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>(
- ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
+ ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
_return_fn = asAclFunction(std::move(fn));
}
auto input_tensor = _tensor_reg->getAclTensor(input_index);
auto fn = acl_common::generateLayer<arm_compute::CLDepthToSpaceLayer>(
- input_tensor->handle(), output_tensor->handle(), block_size);
+ input_tensor->handle(), output_tensor->handle(), block_size);
_return_fn = asAclFunction(std::move(fn));
}
axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value();
auto fn =
- acl_common::generateLayer<arm_compute::CLSplit>(ifm_tensor->handle(), output_tensors, axis);
+ acl_common::generateLayer<arm_compute::CLSplit>(ifm_tensor->handle(), output_tensors, axis);
_return_fn = asAclFunction(std::move(fn));
}
}
split_dim_revised =
- acl_common::ToARMComputeAxis(ifm_rank, split_dim_revised, frontend_layout, backend_layout)
- .value();
+ acl_common::ToARMComputeAxis(ifm_rank, split_dim_revised, frontend_layout, backend_layout)
+ .value();
fn->configure(ifm_tensor->handle(), size_split_tensor->handle(), split_dim_revised,
output_tensors, node.param().num_splits);
}
auto fn =
- acl_common::generateLayer<arm_compute::CLUnstack>(input_tensor->handle(), outputs, axis);
+ acl_common::generateLayer<arm_compute::CLUnstack>(input_tensor->handle(), outputs, axis);
// Revert disabling applied dim_correction
if (input_tensor->dimension(0) == 1)
auto input_type = _ctx.at(input_index).typeInfo();
auto data_type = acl_common::asDataType(input_type.type());
- auto quant_info = ::arm_compute::QuantizationInfo(input_type.scale(), input_type.offset());
+ auto quant_info = ::arm_compute::QuantizationInfo(input_type.scale(), input_type.zero_point());
const auto pixel_value = ::arm_compute::PixelValue(0, data_type, quant_info);
auto input = _tensor_reg->getAclTensor(input_index)->handle();
const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2);
const auto axis =
- acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
+ acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
}
}
auto fn =
- acl_common::generateLayer<arm_compute::CLPadLayer>(input, output, padding_list, pixel_value);
+ acl_common::generateLayer<arm_compute::CLPadLayerEx>(input, output, padding_list, pixel_value);
// NOTE Do not revert disabling applied dim_correction for 4D.
// It would produce a mistach of result by incorrect offset_first_element in
auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>(
- ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
+ ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
_return_fn = asAclFunction(std::move(fn));
}
auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>(
- ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
+ ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
_return_fn = asAclFunction(std::move(fn));
}
}
auto fn = acl_common::generateLayer<arm_compute::CLReverse>(
- ifm_tensor->handle(), ofm_tensor->handle(), axis_tensor->handle());
+ ifm_tensor->handle(), ofm_tensor->handle(), axis_tensor->handle());
_return_fn = asAclFunction(std::move(fn));
}
#ifndef __ONERT_BACKEND_ACL_CL_KERNEL_GENERATOR_H__
#define __ONERT_BACKEND_ACL_CL_KERNEL_GENERATOR_H__
-#include <backend/cpu_common/KernelGeneratorBase.h>
+#include <backend/basic/KernelGeneratorBase.h>
-#include "ir/Operands.h"
#include "TensorBuilder.h"
#include "AclTensorRegistry.h"
#include "TensorManager.h"
namespace acl_cl
{
-class KernelGenerator : public cpu_common::KernelGeneratorBase
+class KernelGenerator : public basic::KernelGeneratorBase
{
public:
- KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder,
+ KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg);
- void visit(const ir::OpSequence &) override;
+ std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override;
+private:
void visit(const ir::operation::ArgMinMax &) override;
void visit(const ir::operation::BatchToSpaceND &) override;
void visit(const ir::operation::BinaryArithmetic &) override;
private:
const ir::Operands &_ctx;
const ir::Operations &_operations_ctx;
+ const ir::Layout _current_layout;
std::shared_ptr<TensorBuilder> _tensor_builder;
std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg;
- ir::Layout _current_layout;
};
} // namespace acl_cl
{
Optimizer::Optimizer(BackendContext *context)
- : _context{context},
- _tensor_builder{std::dynamic_pointer_cast<TensorBuilder>(context->tensor_builder)}
+ : _context{context}, _tensor_builder{
+ std::dynamic_pointer_cast<TensorBuilder>(context->tensor_builder)}
{
assert(context);
}
{
acl_common::AclSubTensorAnalyzer sa{*_context->graph()};
sa.setUsePadding();
- for (auto op_info : _context->operation_list())
- {
- auto &op = _context->graph()->operations().at(op_info.index);
- sa.setLayout(op_info.layout);
- op.accept(sa);
- }
+ _context->graph()->operations().iterate(
+ [&](const ir::OperationIndex &, const ir::Operation &op) {
+ sa.setLayout(_context->graph()->layout());
+ op.accept(sa);
+ });
_tensor_builder->parent_map(sa.releaseParentMap());
}
{
using TensorBuilder =
- acl_common::AclTensorBuilder<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>;
+ acl_common::AclTensorBuilder<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>;
} // namespace acl_cl
} // namespace backend
{
using MemoryManager =
- acl_common::AclMemoryManager<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>;
+ acl_common::AclMemoryManager<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>;
-using LinearMemoryManager = acl_common::AclLinearMemoryManager<
- operand::ICLTensor, operand::CLTensor, operand::CLSubTensor,
- ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
- ::arm_compute::BlobLifetimeManager, ::arm_compute::CLBufferAllocator,
- ::arm_compute::MemoryGroup>;
+using LinearMemoryManager =
+ acl_common::AclLinearMemoryManager<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor,
+ ::arm_compute::MemoryManagerOnDemand,
+ ::arm_compute::PoolManager, ::arm_compute::BlobLifetimeManager,
+ ::arm_compute::CLBufferAllocator, ::arm_compute::MemoryGroup>;
using InternalBufferManager = acl_common::AclInternalBufferManager<
- ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
- ::arm_compute::BlobLifetimeManager, ::arm_compute::CLBufferAllocator>;
+ ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
+ ::arm_compute::BlobLifetimeManager, ::arm_compute::CLBufferAllocator>;
using TensorManager =
- acl_common::AclTensorManager<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>;
+ acl_common::AclTensorManager<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>;
inline TensorManager *createTensorManager(bool is_linear_executor)
{
CLSubTensor::CLSubTensor(ICLTensor *parent, const arm_compute::TensorShape &tensor_shape,
const arm_compute::Coordinates &coords, size_t rank, bool extend_parent)
- : _cl_sub_tensor(std::make_shared<arm_compute::CLSubTensor>(parent->handle(), tensor_shape,
- coords, extend_parent)),
- _rank{rank}
+ : ICLTensor{rank}, _cl_sub_tensor(std::make_shared<arm_compute::CLSubTensor>(
+ parent->handle(), tensor_shape, coords, extend_parent))
{
// DO NOTHING
}
CLSubTensor(ICLTensor *parent, const arm_compute::TensorShape &tensor_shape,
const arm_compute::Coordinates &coords, size_t rank, bool extend_parent = false);
-public:
- size_t num_dimensions() const final { return _rank; }
-
public:
const arm_compute::CLSubTensor *handle() const override;
arm_compute::CLSubTensor *handle() override;
private:
std::shared_ptr<arm_compute::CLSubTensor> _cl_sub_tensor;
- size_t _rank;
};
} // namespace operand
{
CLTensor::CLTensor(const arm_compute::TensorInfo &info, size_t rank, size_t num_uses)
- : _cl_tensor(std::make_shared<arm_compute::CLTensor>()), _rank{rank}, _num_uses{num_uses}
+ : ICLTensor{rank}, _cl_tensor(std::make_shared<arm_compute::CLTensor>()), _num_uses{num_uses}
{
allocator()->init(info);
}
public:
CLTensor(const arm_compute::TensorInfo &info, size_t rank, size_t num_uses);
-public:
- size_t num_dimensions() const final { return _rank; }
-
public:
const arm_compute::CLTensor *handle() const override;
arm_compute::CLTensor *handle() override;
private:
std::shared_ptr<arm_compute::CLTensor> _cl_tensor;
- size_t _rank;
size_t _num_uses;
};
class ICLTensor : public acl_common::IACLTensor
{
public:
+ ICLTensor(size_t rank) : IACLTensor{rank} {}
const arm_compute::ICLTensor *handle() const override = 0;
arm_compute::ICLTensor *handle() override = 0;
AclActivationBuilder<T_Tensor, T_ActivationLayer, T_ExecFunction>::generateReLU(T_Tensor *ifm_alloc)
{
const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
auto fn = std::make_unique<T_ActivationLayer>();
template <typename T_Tensor, typename T_ActivationLayer, typename T_ExecFunction>
std::unique_ptr<exec::IFunction>
AclActivationBuilder<T_Tensor, T_ActivationLayer, T_ExecFunction>::generateReLU1(
- T_Tensor *ifm_alloc)
+ T_Tensor *ifm_alloc)
{
const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
auto fn = std::make_unique<T_ActivationLayer>();
template <typename T_Tensor, typename T_ActivationLayer, typename T_ExecFunction>
std::unique_ptr<exec::IFunction>
AclActivationBuilder<T_Tensor, T_ActivationLayer, T_ExecFunction>::generateReLU6(
- T_Tensor *ifm_alloc)
+ T_Tensor *ifm_alloc)
{
const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f};
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f};
auto fn = std::make_unique<T_ActivationLayer>();
AclConstantInitializer::AclConstantInitializer(const ir::Operands &operands,
const std::shared_ptr<ITensorRegistry> &tensor_reg)
- : cpu_common::ConstantInitializerBase{operands}, _tensor_reg{tensor_reg}
+ : _operands{operands}, _tensor_reg{tensor_reg}, _current_layout{ir::Layout::UNKNOWN}
{
// DO NOTHING
}
permuteInputInitialize(node, ir::operation::TransposeConv::KERNEL);
}
+// NOTE Workaround for 16b float type. Here, this is enough since only the size of bytes matters.
+using float16 = uint16_t;
+
+void AclConstantInitializer::registerCopyInitializer(const ir::OperandIndex &index,
+ const ir::Operand &obj)
+{
+ // For only CONSTANTS
+ // TODO Add to check if tensor has been allocated
+ if (!obj.isConstant())
+ return;
+
+ const auto type = obj.typeInfo().type();
+ using ir::DataType;
+
+ switch (type)
+ {
+ case DataType::FLOAT32:
+ _init_map[index] = copyInit<float>;
+ break;
+ case DataType::INT32:
+ _init_map[index] = copyInit<int32_t>;
+ break;
+ case DataType::UINT32:
+ _init_map[index] = copyInit<uint32_t>;
+ break;
+ case DataType::BOOL8:
+ case DataType::QUANT_UINT8_ASYMM:
+ _init_map[index] = copyInit<uint8_t>;
+ break;
+ case DataType::QUANT_INT8_SYMM:
+ case DataType::QUANT_INT8_ASYMM:
+ _init_map[index] = copyInit<int8_t>;
+ break;
+ case DataType::FLOAT16:
+ _init_map[index] = copyInit<float16>;
+ break;
+ case DataType::INT64:
+ _init_map[index] = copyInit<int64_t>;
+ break;
+ default:
+ throw std::runtime_error("Not supported, yet");
+ break;
+ }
+}
+
+void AclConstantInitializer::registerPermuteInitializer(const ir::OperandIndex &index,
+ const ir::Operand &obj)
+{
+ // For only CONSTANTS
+ // TODO Add to check if tensor has been allocated
+ if (!obj.isConstant())
+ return;
+
+ const auto type = obj.typeInfo().type();
+ using ir::DataType;
+ using namespace std::placeholders;
+
+ switch (type)
+ {
+ case DataType::FLOAT32:
+ _init_map[index] = std::bind(permuteInit<float>, _1, _2, _current_layout);
+ break;
+ case DataType::INT32:
+ _init_map[index] = std::bind(permuteInit<int32_t>, _1, _2, _current_layout);
+ break;
+ case DataType::UINT32:
+ _init_map[index] = std::bind(permuteInit<uint32_t>, _1, _2, _current_layout);
+ break;
+ case DataType::BOOL8:
+ case DataType::QUANT_UINT8_ASYMM:
+ _init_map[index] = std::bind(permuteInit<uint8_t>, _1, _2, _current_layout);
+ break;
+ case DataType::QUANT_INT8_SYMM:
+ case DataType::QUANT_INT8_ASYMM:
+ _init_map[index] = std::bind(permuteInit<int8_t>, _1, _2, _current_layout);
+ break;
+ case DataType::FLOAT16:
+ _init_map[index] = std::bind(permuteInit<float16>, _1, _2, _current_layout);
+ break;
+ case DataType::INT64:
+ _init_map[index] = std::bind(permuteInit<int64_t>, _1, _2, _current_layout);
+ break;
+ default:
+ throw std::runtime_error("Not supported, yet");
+ break;
+ }
+}
+
} // namespace acl_common
} // namespace backend
} // namespace onert
#ifndef __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
#define __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
-#include <backend/cpu_common/ConstantInitializerBase.h>
-#include <ir/Operands.h>
#include "AclTensorRegistry.h"
+#include <unordered_map>
+#include <functional>
+
+#include <ir/Coordinates.h>
+#include <ir/Layout.h>
+#include <ir/Operand.h>
+#include <ir/Operands.h>
+#include <ir/OperationVisitor.h>
+#include <backend/ITensorRegistry.h>
+#include <util/logging.h>
+
namespace onert
{
namespace backend
namespace acl_common
{
-class AclConstantInitializer : public cpu_common::ConstantInitializerBase
+template <typename T>
+static void Init(const onert::ir::Operand &model_obj, onert::backend::ITensor &obj, const bool copy,
+ const onert::ir::Layout frontend_layout = onert::ir::Layout::UNKNOWN)
+{
+ const auto shape = model_obj.shape();
+ assert(model_obj.data());
+ auto base = reinterpret_cast<const T *>(model_obj.data()->base());
+
+ obj.access([&](::onert::backend::ITensor &tensor) {
+ switch (shape.rank())
+ {
+ case 0:
+ {
+ assert(model_obj.data()->size() == sizeof(T));
+ const auto value = *reinterpret_cast<const T *>(base);
+ T *into = reinterpret_cast<T *>(tensor.buffer());
+ *into = value;
+ break;
+ }
+ case 1:
+ {
+ auto vec_size = shape.dim(0);
+ for (int32_t n = 0; n < vec_size; ++n)
+ {
+ const T *from = reinterpret_cast<const T *>(base) + n;
+ const auto value = *from;
+
+ T *into = reinterpret_cast<T *>(tensor.buffer()) + n;
+
+ *into = value;
+ }
+ break;
+ }
+ case 2:
+ {
+ const int32_t copy_len = shape.dim(1);
+
+ for (auto i = 0; i < shape.dim(0); ++i)
+ {
+ ::onert::ir::Coordinates coords{i, 0};
+ memcpy(tensor.buffer() + tensor.calcOffset(coords), base + i * copy_len,
+ copy_len * sizeof(T));
+ }
+ break;
+ }
+ case 3:
+ {
+ const int32_t width = shape.dim(1);
+ const int32_t copy_len = shape.dim(2);
+
+ for (auto i = 0; i < shape.dim(0); ++i)
+ {
+ for (auto j = 0; j < shape.dim(1); ++j)
+ {
+ ::onert::ir::Coordinates coords{i, j, 0};
+ memcpy(tensor.buffer() + tensor.calcOffset(coords),
+ base + i * width * copy_len + j * copy_len, copy_len * sizeof(T));
+ }
+ }
+ break;
+ }
+ case 4:
+ {
+ const int32_t height = shape.dim(1);
+ const int32_t width = shape.dim(2);
+ const int32_t copy_len = shape.dim(3);
+ for (auto i = 0; i < shape.dim(0); ++i)
+ {
+ for (auto j = 0; j < shape.dim(1); ++j)
+ {
+ for (auto k = 0; k < shape.dim(2); ++k)
+ {
+ if (copy)
+ {
+ ::onert::ir::Coordinates coords{i, j, k, 0};
+ memcpy(tensor.buffer() + tensor.calcOffset(coords),
+ base + i * height * width * copy_len + j * width * copy_len + k * copy_len,
+ copy_len * sizeof(T));
+ }
+ else
+ {
+ for (auto l = 0; l < shape.dim(3); ++l)
+ {
+ const auto coords =
+ ::onert::ir::convertCoordinates({i, j, k, l}, frontend_layout, tensor.layout());
+ T *into = reinterpret_cast<T *>(tensor.buffer() + tensor.calcOffset(coords));
+ T value = *(base + i * height * width * copy_len + j * width * copy_len +
+ k * copy_len + l);
+ *into = value;
+ }
+ }
+ }
+ }
+ }
+ break;
+ }
+ default:
+ throw std::runtime_error{"Not yet supported"};
+ }
+ });
+}
+
+template <typename T>
+void copyInit(const onert::ir::Operand &model_obj, onert::backend::ITensor &obj)
+{
+ Init<T>(model_obj, obj, true);
+}
+
+template <typename T>
+void permuteInit(const onert::ir::Operand &model_obj, onert::backend::ITensor &obj,
+ const onert::ir::Layout frontend_layout)
{
+ const bool copy = frontend_layout == obj.layout();
+ Init<T>(model_obj, obj, copy, frontend_layout);
+}
+
+class AclConstantInitializer : public ir::OperationVisitor
+{
+public:
+ void run()
+ {
+ assert(_tensor_reg);
+ for (const auto &it : _init_map)
+ {
+ const auto &ind = it.first;
+ const auto &fn = it.second;
+
+ const auto &model_obj = _operands.at(ind);
+ auto tensor_obj = _tensor_reg->getNativeITensor(ind);
+ assert(tensor_obj != nullptr);
+ fn(model_obj, *tensor_obj);
+ VERBOSE(FillOperandData) << "Fill data for operand " << ind << std::endl;
+ }
+ _init_map.clear();
+ }
+
public:
AclConstantInitializer(const ir::Operands &operands,
const std::shared_ptr<ITensorRegistry> &tensor_reg);
+public:
+ using Initializer = std::function<void(const ir::Operand &, backend::ITensor &)>;
+
+public:
+ void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj)
+ {
+ registerPermuteInitializer(index, obj);
+ }
+ void registerCopyInitializer(const ir::OperandIndex &index, const ir::Operand &obj);
+ void registerPermuteInitializer(const ir::OperandIndex &index, const ir::Operand &obj);
+
+public:
+ void setLayout(ir::Layout layout) { _current_layout = layout; }
+ bool exist(const ir::OperandIndex &ind) { return _init_map.find(ind) != _init_map.end(); }
+
public:
void visit(const ir::operation::BatchToSpaceND &) override;
void visit(const ir::operation::Conv2D &) override;
void copyInputInitialize(const ir::Operation &node, uint32_t index);
void permuteInputInitialize(const ir::Operation &node, uint32_t index);
-private:
- std::shared_ptr<ITensorRegistry> tensor_registry() const final { return _tensor_reg; }
-
protected:
+ const ir::Operands &_operands;
std::shared_ptr<ITensorRegistry> _tensor_reg;
+ std::unordered_map<ir::OperandIndex, Initializer> _init_map;
+ ir::Layout _current_layout;
};
} // namespace acl_common
#include <arm_compute/runtime/IMemoryManager.h>
#include <cassert>
#include <memory>
-#include <backend/IMemoryManager.h>
namespace onert
{
/**
* @brief Interface for InternalBufferManager which has ::arm_compute::IMemoryManager pointer
*/
-struct IInternalBufferManager : public backend::IMemoryManager
+struct IInternalBufferManager
{
virtual ~IInternalBufferManager() = default;
+ virtual void allocate(void) = 0;
+ virtual void deallocate(void) = 0;
+
/**
* @brief Get shared_ptr of ::arm_compute::IMemoryManager
*/
void enableDimCorrection(IACLTensor *tensor)
{
- size_t input_rank = tensor->num_dimensions();
+ size_t input_rank = tensor->getShape().rank();
const_cast<arm_compute::TensorShape &>(tensor->info()->tensor_shape())
- .set(input_rank - 1, tensor->info()->dimension(input_rank - 1), true);
+ .set(input_rank - 1, tensor->info()->dimension(input_rank - 1), true);
}
void disableDimCorrection(IACLTensor *tensor)
{
- size_t input_rank = tensor->num_dimensions();
+ size_t input_rank = tensor->getShape().rank();
const_cast<arm_compute::TensorShape &>(tensor->info()->tensor_shape())
- .set(input_rank - 1, tensor->info()->dimension(input_rank - 1), false);
+ .set(input_rank - 1, tensor->info()->dimension(input_rank - 1), false);
}
template <typename Layer, typename... Args>
// TODO Support dynamic rnn
// TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
const auto scratch_buffer_index{
- node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
+ node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
const auto output_state_out_index{
- node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
+ node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
const auto cell_state_out_index{
- node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
+ node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
const auto input_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
const auto input_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
const auto input_to_cell_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
const auto input_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
const auto recurrent_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
const auto recurrent_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
const auto recurrent_to_cell_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
const auto recurrent_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
const auto cell_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
+ node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
const auto cell_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
+ node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
const auto cell_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
+ node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
const auto input_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
const auto forget_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
+ node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
const auto output_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
+ node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
const auto projection_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
+ node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
const auto projection_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
+ node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
const auto output_state_in_index{
- node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
+ node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
const auto cell_threshold = node.param().cell_threshold;
const auto projection_threshold = node.param().projection_threshold;
bool has_input_to_input_weights = operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
operands.at(input_to_input_weights_index).shape().dim(1) != 0;
bool has_recurrent_to_input_weights =
- operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
- operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
+ operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+ operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
bool has_cell_to_forget_weights = operands.at(cell_to_forget_weights_index).shape().dim(0) != 0;
bool has_cell_to_output_weights = operands.at(cell_to_output_weights_index).shape().dim(0) != 0;
bool has_projection_weights = operands.at(projection_weights_index).shape().dim(0) != 0 &&
auto input_to_cell_weights_tensor = tensor_reg->getAclTensor(input_to_cell_weights_index);
auto input_to_output_weights_tensor = tensor_reg->getAclTensor(input_to_output_weights_index);
auto recurrent_to_forget_weights_tensor =
- tensor_reg->getAclTensor(recurrent_to_forget_weights_index);
+ tensor_reg->getAclTensor(recurrent_to_forget_weights_index);
auto recurrent_to_cell_weights_tensor = tensor_reg->getAclTensor(recurrent_to_cell_weights_index);
auto recurrent_to_output_weights_tensor =
- tensor_reg->getAclTensor(recurrent_to_output_weights_index);
+ tensor_reg->getAclTensor(recurrent_to_output_weights_index);
auto forget_gate_bias_tensor = tensor_reg->getAclTensor(forget_gate_bias_index);
auto cell_bias_tensor = tensor_reg->getAclTensor(cell_bias_index);
if (has_cifg_param)
{
auto input_to_input_weights_tensor =
- tensor_reg->getAclTensor(input_to_input_weights_index); // optional
+ tensor_reg->getAclTensor(input_to_input_weights_index); // optional
auto recurrent_to_input_weights_tensor =
- tensor_reg->getAclTensor(recurrent_to_input_weights_index); // optional
+ tensor_reg->getAclTensor(recurrent_to_input_weights_index); // optional
auto cell_to_input_weights_handle =
- has_peephole_param ? tensor_reg->getAclTensor(cell_to_input_weights_index)->handle()
- : nullptr; // optional (non-cifg && peephole)
+ has_peephole_param ? tensor_reg->getAclTensor(cell_to_input_weights_index)->handle()
+ : nullptr; // optional (non-cifg && peephole)
auto input_gate_bias_tensor = tensor_reg->getAclTensor(input_gate_bias_index); // optional
lstm_params.set_cifg_params(input_to_input_weights_tensor->handle(),
recurrent_to_input_weights_tensor->handle(),
if (has_peephole_param)
{
auto cell_to_forget_weights_tensor =
- tensor_reg->getAclTensor(cell_to_forget_weights_index); // optional
+ tensor_reg->getAclTensor(cell_to_forget_weights_index); // optional
auto cell_to_output_weights_tensor =
- tensor_reg->getAclTensor(cell_to_output_weights_index); // optional
+ tensor_reg->getAclTensor(cell_to_output_weights_index); // optional
lstm_params.set_peephole_params(cell_to_forget_weights_tensor->handle(),
cell_to_output_weights_tensor->handle());
}
{
auto projection_weights_tensor = tensor_reg->getAclTensor(projection_weights_index); // optional
auto projection_bias_handle = has_projection_bias
- ? tensor_reg->getAclTensor(projection_bias_index)->handle()
- : nullptr; // optional
+ ? tensor_reg->getAclTensor(projection_bias_index)->handle()
+ : nullptr; // optional
lstm_params.set_projection_params(projection_weights_tensor->handle(), projection_bias_handle);
}
auto fn = generateLayer<T_ACLLayer>(
- input_tensor->handle(), input_to_forget_weights_tensor->handle(),
- input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(),
- recurrent_to_forget_weights_tensor->handle(), recurrent_to_cell_weights_tensor->handle(),
- recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(),
- cell_bias_tensor->handle(), output_gate_bias_tensor->handle(),
- output_state_in_tensor->handle(), cell_state_in_tensor->handle(),
- scratch_buffer_tensor->handle(), output_state_out_tensor->handle(),
- cell_state_out_tensor->handle(), output_tensor->handle(), lstm_params, act_info, cell_clip,
- projection_clip);
+ input_tensor->handle(), input_to_forget_weights_tensor->handle(),
+ input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(),
+ recurrent_to_forget_weights_tensor->handle(), recurrent_to_cell_weights_tensor->handle(),
+ recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(),
+ cell_bias_tensor->handle(), output_gate_bias_tensor->handle(), output_state_in_tensor->handle(),
+ cell_state_in_tensor->handle(), scratch_buffer_tensor->handle(),
+ output_state_out_tensor->handle(), cell_state_out_tensor->handle(), output_tensor->handle(),
+ lstm_params, act_info, cell_clip, projection_clip);
return std::make_unique<T_FunctionWrapper>(std::move(fn));
}
const auto input_rank = operands.at(input_index).shape().rank();
const auto output_size =
- operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 1);
+ operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 1);
UNUSED_RELEASE(output_size);
assert(bias_index.undefined() || operands.at(bias_index).shape().dim(0) == output_size);
assert(operands.at(weight_index).shape().dim(0) == output_size);
const auto batch_size =
- operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 2);
+ operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 2);
const auto input_size =
- operands.at(weight_index).shape().dim(operands.at(weight_index).shape().rank() - 1);
+ operands.at(weight_index).shape().dim(operands.at(weight_index).shape().rank() - 1);
// Check for reshaping input's shape into rank-2
bool needs_reshape = false;
}
auto fn = generateLayer<T_ACLLayer>(
- tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
- weight_tensor->handle(), bias_tensor != nullptr ? bias_tensor->handle() : nullptr,
- output_tensor->handle(), needs_reshape,
- asTensorShape(reshape, frontend_layout, asRuntimeLayout(acl_layout)), kernel_type);
+ tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+ weight_tensor->handle(), bias_tensor != nullptr ? bias_tensor->handle() : nullptr,
+ output_tensor->handle(), needs_reshape,
+ asTensorShape(reshape, frontend_layout, asRuntimeLayout(acl_layout)), kernel_type);
return std::make_unique<T_FunctionWrapper>(std::move(fn));
}
const auto kw = node.param().kw;
const auto stride = node.param().stride;
const auto padding =
- ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+ ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
VERBOSE(Pool2DParam) << "IFM_H: " << ifm_shape.H << std::endl;
VERBOSE(Pool2DParam) << "IFM_W: " << ifm_shape.W << std::endl;
auto ifm_tensor = tensor_reg->getAclTensor(ifm_index);
::arm_compute::PoolingLayerInfo info{
- pooling_type, ::arm_compute::Size2D{kw, kh}, ifm_tensor->info()->data_layout(),
- asPadStrideInfo(padding, stride), true /* exclude_padding */};
+ pooling_type, ::arm_compute::Size2D{kw, kh}, ifm_tensor->info()->data_layout(),
+ asPadStrideInfo(padding, stride), true /* exclude_padding */};
auto fn = generateLayer<T_ACLLayer>(ifm_tensor->handle(), ofm_tensor->handle(), info);
#include "ir/OperandIndexMap.h"
#include "util/logging.h"
-namespace
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
{
template <typename T_MemoryManager, typename T_PoolManager, typename T_LifetimeManager>
std::shared_ptr<T_PoolManager> pool_mgr = std::make_shared<T_PoolManager>();
std::shared_ptr<T_MemoryManager> mem_mgr =
- std::make_shared<T_MemoryManager>(lifetime_mgr, pool_mgr);
+ std::make_shared<T_MemoryManager>(lifetime_mgr, pool_mgr);
return mem_mgr;
}
-} // namespace
-
-namespace onert
-{
-namespace backend
-{
-namespace acl_common
-{
-
template <typename T_ITensor, typename T_Tensor, typename T_SubTensor, typename T_MemoryManager,
typename T_PoolManager, typename T_LifetimeManager, typename T_Allocator,
typename T_MemoryGroup>
{
public:
AclLinearMemoryManager()
- : _allocator{nullptr},
- _io_manager{createMemoryManager<T_MemoryManager, T_PoolManager, T_LifetimeManager>()},
- _io_group{std::make_shared<T_MemoryGroup>(_io_manager)}
+ : _allocator{nullptr},
+ _io_manager{createMemoryManager<T_MemoryManager, T_PoolManager, T_LifetimeManager>()},
+ _io_group{std::make_shared<T_MemoryGroup>(_io_manager)}
{
// DO NOTHING
}
#include <arm_compute/runtime/IMemoryManager.h>
#include <cassert>
-#include "backend/IMemoryManager.h"
#include "ir/OperandIndexMap.h"
#include "Convert.h"
#include "util/logging.h"
namespace acl_common
{
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-class AclMemoryManager : public backend::IMemoryManager
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> class AclMemoryManager
{
public:
AclMemoryManager()
virtual ~AclMemoryManager() = default;
- void allocate(void) override
+ virtual void allocate(void)
{
for (const auto &tensor_entry : _tensors)
{
}
}
- void deallocate(void) override
+ virtual void deallocate(void)
{
for (const auto &tensor_entry : _tensors)
{
}
}
- virtual void startLifetime(const ir::OperandIndex &) { /* DO NOTHING */}
- virtual void finishLifetime(const ir::OperandIndex &) { /* DO NOTHING */}
+ virtual void startLifetime(const ir::OperandIndex &)
+ { /* DO NOTHING */
+ }
+ virtual void finishLifetime(const ir::OperandIndex &)
+ { /* DO NOTHING */
+ }
void buildTensor(const ir::OperandIndex &ind, const ::arm_compute::TensorInfo &info, size_t rank,
size_t num_uses)
bool extent_parent)
{
auto subtensor =
- std::make_shared<T_SubTensor>(parent_tensor.get(), shape, coordinates, rank, extent_parent);
+ std::make_shared<T_SubTensor>(parent_tensor.get(), shape, coordinates, rank, extent_parent);
_subtensors[child_ind] = subtensor;
}
coordinate_info.set(axis, axis_point);
_parent_map.emplace(
- input_index, acl_common::ParentInfo{output_index, _current_op_layout, coordinate_info});
+ input_index, acl_common::ParentInfo{output_index, _current_op_layout, coordinate_info});
axis_point += input_shape.dim(axis);
}
template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(const ir::Operands &operands,
T_AclTensorManager *tensor_mgr)
- : _operands{operands}, _tensor_mgr{tensor_mgr}
+ : _operands{operands}, _tensor_mgr{tensor_mgr}
{
assert(_tensor_mgr);
}
template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::registerTensorInfo(
- const ir::OperandIndex &ind, const ir::OperandInfo &info, ir::Layout backend_layout)
+ const ir::OperandIndex &ind, const ir::OperandInfo &info, ir::Layout backend_layout)
{
assert(_tensor_mgr->constTensors().size() == 0);
assert(_tensor_mgr->nonconstTensors().size() == 0);
offset = {offset[0], offset[3], offset[1], offset[2]};
}
else if (_operands.at(parent_index).shape().rank() >= 4 &&
- frontend_layout == ir::Layout::NHWC && backend_layout == ir::Layout::NCHW)
+ frontend_layout == ir::Layout::NCHW && backend_layout == ir::Layout::NHWC)
{
// Permutation changing layout beyond 4-D is not supported yet
const auto parent_rank = _operands.at(parent_index).shape().rank();
template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
bool AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::isRegistered(
- const ir::OperandIndex &ind) const
+ const ir::OperandIndex &ind) const
{
return _tensor_info_map.find(ind) != _tensor_info_map.end();
}
std::unordered_map<ir::OperandIndex, ir::OperandIndex> root_map;
std::function<ir::OperandIndex &(ir::OperandIndex)> find_root =
- [&](ir::OperandIndex ind) -> ir::OperandIndex & {
+ [&](ir::OperandIndex ind) -> ir::OperandIndex & {
ir::OperandIndex &ret = root_map[ind];
// We know the root parent value already
const auto &info = entry.second;
const auto &backend_layout = _tensor_layout_map[ind];
auto tensor_info =
- asTensorInfo(info.shape(), info.typeInfo(), ir::Layout::UNKNOWN, backend_layout, true);
+ asTensorInfo(info.shape(), info.typeInfo(), ir::Layout::UNKNOWN, backend_layout, true);
_tensor_mgr->buildTensor(ind, tensor_info, info.shape().rank(), info.isConstant(),
_uses_count_map[ind]);
}
// Subtensors
assert(_tensor_mgr->nonconstSubtensors().size() == 0);
// TODO Iterate `_parent_map` instead, once the optimizer bug is fixed
- // `Optimizer` iterates the entire OpSequences, so there is a bug if iterating _parent_map
+ // `Optimizer` iterates the entire Operations, so there is a bug if iterating _parent_map
for (auto &entry : _tensor_info_map)
{
auto ind = entry.first;
assert(parent_tensor != nullptr);
// Child's type should be same with parent
- assert(tensor_info.typeInfo().offset() ==
+ assert(tensor_info.typeInfo().zero_point() ==
parent_tensor->info()->quantization_info().uniform().offset);
assert(tensor_info.typeInfo().scale() ==
parent_tensor->info()->quantization_info().uniform().scale);
auto shape = asTensorShape(tensor_info.shape(), ir::Layout::UNKNOWN, backend_layout, true);
::arm_compute::Coordinates coordinates =
- asTensorCoordinate(parent_info.coordinates, ir::Layout::UNKNOWN, backend_layout);
+ asTensorCoordinate(parent_info.coordinates, ir::Layout::UNKNOWN, backend_layout);
_tensor_mgr->buildSubtensor(parent, current, shape, coordinates, tensor_info.shape().rank(),
true);
stack.pop();
template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
bool AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::areSubTensorsOf(
- const ir::OperandIndex &parent, const ir::OperandIndexSequence &seq)
+ const ir::OperandIndex &parent, const ir::OperandIndexSequence &seq)
{
for (auto &cand : seq)
{
template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
bool AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::isSubTensorOf(
- const ir::OperandIndex &parent, const ir::OperandIndex &child)
+ const ir::OperandIndex &parent, const ir::OperandIndex &child)
{
auto itr = _parent_map.find(child);
if (itr == _parent_map.end())
#include <arm_compute/runtime/IMemoryManager.h>
-#include "backend/ITensorManager.h"
#include "AclMemoryManager.h"
#include "AclInternalBufferManager.h"
#include "ir/OperandIndexMap.h"
namespace acl_common
{
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-class AclTensorManager : public backend::ITensorManager
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> class AclTensorManager
{
public:
using T_AclMemoryManager = AclMemoryManager<T_ITensor, T_Tensor, T_SubTensor>;
template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::AclTensorManager(
- T_AclMemoryManager *const_mgr, T_AclMemoryManager *nonconst_mgr,
- IInternalBufferManager *inter_mgr)
- : _const_mgr{const_mgr}, _nonconst_mgr{nonconst_mgr}, _inter_mgr{inter_mgr}
+ T_AclMemoryManager *const_mgr, T_AclMemoryManager *nonconst_mgr,
+ IInternalBufferManager *inter_mgr)
+ : _const_mgr{const_mgr}, _nonconst_mgr{nonconst_mgr}, _inter_mgr{inter_mgr}
{
// DO NOTHING
}
template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::buildTensor(
- const ir::OperandIndex &ind, const ::arm_compute::TensorInfo &info, size_t rank, bool as_const,
- size_t num_uses)
+ const ir::OperandIndex &ind, const ::arm_compute::TensorInfo &info, size_t rank, bool as_const,
+ size_t num_uses)
{
assert(_ind_to_mgr.find(ind) == _ind_to_mgr.end());
if (as_const)
template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::buildSubtensor(
- const ir::OperandIndex &parent, const ir::OperandIndex &child,
- const ::arm_compute::TensorShape &shape, const ::arm_compute::Coordinates &coordinates,
- size_t rank, bool extent_parent)
+ const ir::OperandIndex &parent, const ir::OperandIndex &child,
+ const ::arm_compute::TensorShape &shape, const ::arm_compute::Coordinates &coordinates,
+ size_t rank, bool extent_parent)
{
assert(_ind_to_mgr.find(child) == _ind_to_mgr.end());
std::shared_ptr<T_ITensor> parent_tensor = findTensorAsParent(parent);
template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::iterate(
- const std::function<void(const ir::OperandIndex &)> &fn)
+ const std::function<void(const ir::OperandIndex &)> &fn)
{
for (auto it : _nonconst_mgr->tensors())
fn(it.first);
// used in several nodes.
if (tensor->handle() && !tensor->handle()->is_used() && tensor->num_uses() < 2)
{
- VERBOSE(AclTensorManager) << "Tensor #" << ind.value()
+ VERBOSE(AclTensorManager) << "Tensor " << ind
<< " will be deallocated as an unused constant tensor" << std::endl;
tensor->allocator()->free();
tensor.reset();
bool apply_dim_correction)
{
::arm_compute::TensorInfo info(
- asTensorShape(shape, frontend_layout, backend_layout, apply_dim_correction), 1,
- asDataType(typeInfo.type()), asQuantizationInfo(typeInfo.scale(), typeInfo.offset()));
+ asTensorShape(shape, frontend_layout, backend_layout, apply_dim_correction), 1,
+ asDataType(typeInfo.type()), asQuantizationInfo(typeInfo.scale(), typeInfo.zero_point()));
info.set_data_layout(asDataLayout(backend_layout));
return info;
}
return ::arm_compute::ActivationLayerInfo{};
case ir::Activation::RELU:
return ::arm_compute::ActivationLayerInfo{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
case ir::Activation::RELU1:
return ::arm_compute::ActivationLayerInfo{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
case ir::Activation::RELU6:
return ::arm_compute::ActivationLayerInfo{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f};
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f};
// Cases for activation of LSTM.
case ir::Activation::TANH:
return ::arm_compute::ActivationLayerInfo{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
case ir::Activation::SIGMOID:
// NOTE The sigmoid function is a special case of the Logistic function when L=1, k=1, x0=0.
// TODO In ACL and nnapi sepc, currently, Logistic's L always is 1, k always is 1, x0 always
// 0(always sigmoid) regardless of values of the parameter.
// If ACL support non-sigmoid logistic, should fix param values.
return ::arm_compute::ActivationLayerInfo{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.0f, 0.0f};
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.0f, 0.0f};
default:
throw std::runtime_error{"Not supported internal activation, yet"};
break;
if (alpha == ir::operation::ElementwiseActivation::infinity)
{
return ::arm_compute::ActivationLayerInfo{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
}
else
{
return ::arm_compute::ActivationLayerInfo{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, alpha};
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, alpha};
}
}
else
{
return ::arm_compute::ActivationLayerInfo{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, alpha, beta};
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, alpha, beta};
}
case ir::operation::ElementwiseActivation::Type::TANH:
return ::arm_compute::ActivationLayerInfo{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, alpha, beta};
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, alpha, beta};
case ir::operation::ElementwiseActivation::Type::LOGISTIC:
// NOTE The sigmoid function is a special case of the Logistic function when L=1, k=1, x0=0.
// TODO In ACL and nnapi sepc, currently, Logistic's L always is 1, k always is 1, x0 always
// 0(always sigmoid) regardless of values of the parameter.
// If ACL support non-sigmoid logistic, should fix param values.
return ::arm_compute::ActivationLayerInfo{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
return ::arm_compute::ActivationLayerInfo{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LEAKY_RELU, alpha};
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LEAKY_RELU, alpha};
default:
throw std::runtime_error{"Not supported internal elementwise activation, yet"};
break;
namespace acl_common
{
-size_t IACLTensor::dimension(size_t index) const
-{
- // Assume that the front is higher dimensional.
- // i.g. N: 0, C: 1, H: 2, W: 3 for NCHW layout
- // NOTE This tensor must not be applied dim correction
- auto rank = num_dimensions();
- rank = rank == 0 ? 1 : rank;
- assert(rank > index);
- const ARMComputeAxis reversed{(static_cast<uint32_t>(rank - index) - 1)};
- return info()->dimension(reversed.value());
-}
-
size_t IACLTensor::calcOffset(const ir::Coordinates &coords) const
{
- auto rank = num_dimensions();
+ auto rank = _rank;
rank = rank == 0 ? 1 : rank;
- assert(rank == coords.size());
+ assert(static_cast<size_t>(rank) == coords.size());
::arm_compute::Coordinates acl_coords;
- for (uint32_t i = 0; i < rank; ++i)
+ for (size_t i = 0; i < rank; ++i)
{
const ARMComputeAxis reversed{static_cast<uint32_t>((rank - i) - 1)};
acl_coords.set(reversed.value(), coords[i]);
return info()->quantization_info().uniform().scale;
}
-int32_t IACLTensor::data_offset() const
+int32_t IACLTensor::data_zero_point() const
{
// FIXME What if quantization info is non-uniform?
return info()->quantization_info().uniform().offset;
}
+const std::vector<float> &IACLTensor::data_scales() const
+{
+ throw std::runtime_error("IACLTensor::data_scales() is not supported.");
+}
+
+const std::vector<int32_t> &IACLTensor::data_zero_points() const
+{
+ throw std::runtime_error("IACLTensor::data_zero_points() is not supported.");
+}
+
} // namespace acl_common
} // namespace backend
} // namespace onert
#include <backend/ITensor.h>
#include <arm_compute/core/ITensor.h>
+#include "Swizzle.h"
namespace onert
{
IACLTensor(IACLTensor &&) = default;
IACLTensor &operator=(IACLTensor &&) = default;
+ IACLTensor(size_t rank) : _rank{rank} {}
+
public:
uint8_t *buffer() const final { return handle()->buffer(); }
size_t total_size() const final { return info()->total_size(); }
- size_t dimension(size_t index) const final;
size_t calcOffset(const ir::Coordinates &coords) const final;
ir::Layout layout() const final;
ir::DataType data_type() const final;
float data_scale() const override;
- int32_t data_offset() const override;
+ int32_t data_zero_point() const override;
+ const std::vector<float> &data_scales() const override;
+ const std::vector<int32_t> &data_zero_points() const override;
bool has_padding() const override { return info()->has_padding(); }
bool is_dynamic() const override { return false; }
+ ir::Shape getShape() const override
+ {
+ onert::ir::Shape shape(num_dimensions());
+ for (uint32_t d = 0; d < num_dimensions(); d++)
+ shape.dim(d) = dimension(d);
+ return shape;
+ }
public:
virtual const arm_compute::ITensor *handle() const = 0;
const arm_compute::ITensorInfo *info() const { return handle()->info(); }
arm_compute::ITensorInfo *info() { return handle()->info(); }
+
+ size_t dimension(size_t index) const
+ {
+ // Assume that the front is higher dimensional.
+ // i.g. N: 0, C: 1, H: 2, W: 3 for NCHW layout
+ // NOTE This tensor must not be applied dim correction
+ auto rank = _rank;
+ rank = rank == 0 ? 1 : rank;
+ assert(rank > index);
+ const ARMComputeAxis reversed{(static_cast<uint32_t>(rank - index) - 1)};
+ return info()->dimension(reversed.value());
+ }
+ size_t num_dimensions() const { return _rank; }
+
+protected:
+ size_t _rank; // Actual rank (reflects extended rank)
};
} // namespace acl_common
}
::arm_compute::PermutationVector ACL_PV =
- ::arm_compute::PermutationVector{new_pv[0], new_pv[1], new_pv[2], new_pv[3]};
+ ::arm_compute::PermutationVector{new_pv[0], new_pv[1], new_pv[2], new_pv[3]};
ACL_PV.set_num_dimensions(rank);
return ACL_PV;
for (int32_t i = numOfBits - 1; i >= 0; --i)
{
const uint32_t toShift =
- numOfBits - ToARMComputeAxis(numOfBits, i, org_layout, acl_layout).value() - 1;
+ numOfBits - ToARMComputeAxis(numOfBits, i, org_layout, acl_layout).value() - 1;
out += ((in & 1) << toShift);
in >>= 1;
}
std::shared_ptr<IConfig> config() const override { return _config; }
- std::unique_ptr<backend::BackendContext>
- newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &,
- bool is_linear_executor) const override
+ std::unique_ptr<backend::BackendContext> newContext(ContextData &&data) const override
{
- const auto &operands = graph.operands();
- const auto &operations = graph.operations();
- auto context = std::make_unique<acl_neon::BackendContext>(this, &graph);
- auto tm = createTensorManager(is_linear_executor);
+ const auto &graph = *data.graph;
+ const auto &operands = data.graph->operands();
+ auto context = std::make_unique<acl_neon::BackendContext>(this, std::move(data));
+ auto tm = createTensorManager(data.is_linear_executor);
auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
auto tb = std::make_shared<TensorBuilder>(operands, tm);
context->tensor_registry = tr;
context->tensor_builder = tb;
context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
- context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr);
+ context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr);
context->optimizer = std::make_shared<Optimizer>(context.get());
return context;
}
void BackendContext::initConsts()
{
- for (auto &op : operation_list())
- {
- constant_initializer->setLayout(op.layout);
- graph()->operations().at(op.index).accept(*constant_initializer);
- }
+ _data.graph->operations().iterate([&](const ir::OperationIndex &, const ir::Operation &op) {
+ constant_initializer->setLayout(graph()->layout());
+ op.accept(*constant_initializer);
+ });
- for (auto ind : operand_list())
- {
+ _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
+ if (_data.external_operands.contains(ind) || !operand.isConstant())
+ return;
const auto &obj = graph()->operands().at(ind);
if (obj.isConstant() && !constant_initializer->exist(ind))
{
constant_initializer->registerDefaultInitializer(ind, obj);
}
- }
+ });
constant_initializer->run();
}
-void BackendContext::planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info)
+void BackendContext::planTensors()
{
ir::OperandIndexMap<uint32_t> uses_map;
ir::OperandIndexMap<uint32_t> def_map;
ir::OperandIndexSequence constants;
// Prepare scanning
- for (auto ind : operand_list())
- {
- const auto &obj = graph()->operands().at(ind);
- const auto &li = lower_info.operand.at(ind);
- if (li->def_factors().getOnlyElement().backend() != backend())
- continue;
-
- // Ignore unused tensor
- if (li->def_factors().size() == 0 && li->use_factors().size() == 0)
- {
- VERBOSE(planTensors) << "Operand #" << ind.value() << " will not be used. no more process."
- << std::endl;
+ _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+ if (_data.external_operands.contains(ind))
return;
- }
uses_map[ind] = obj.getUses().size();
def_map[ind] = obj.getDef().valid() ? 1 : 0;
if (obj.isConstant())
constants.append(ind);
- auto factor = li->def_factors().getOnlyElement();
if (!tensor_builder->isRegistered(ind))
{
- // These tensors do not exist in any op_seq (No use and def)
+ // These tensors do not exist in any operation (No use and def)
const auto info = obj.info();
- const auto backend_layout = factor.layout();
+ const auto layout = _data.operand_layouts.at(ind);
// TODO Change tensor info to have permuted shape
- tensor_builder->registerTensorInfo(ind, info, backend_layout);
+ tensor_builder->registerTensorInfo(ind, info, layout);
}
- }
+ });
// Start scanning to do notify{First|Last}Use for each tensor
// 1. Scan DEF of outputs. If the DEF, allocate it
// 2. Scan DEF of inputs. If variable tensor, allocate it
// 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
- for (const auto op_seq_ind : order)
+ for (const auto op_ind : _data.op_order)
{
- const auto &op_seq = op_seqs.at(op_seq_ind);
- for (const auto &op_idx : op_seq.operations())
- {
- auto &op = graph()->operations().at(op_idx);
- auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
- auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+ auto op_inputs =
+ graph()->operations().at(op_ind).getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+ auto op_outputs = graph()->operations().at(op_ind).getOutputs() | ir::Remove::DUPLICATED |
+ ir::Remove::UNDEFINED;
- // Define outputs
- for (const auto &ind : op_outputs)
+ // Define outputs
+ for (const auto &ind : op_outputs)
+ {
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ assert(def_map.find(ind) != def_map.end());
+ if (def_map[ind])
{
- if (!tensor_builder->isRegistered(ind))
- continue;
- assert(def_map.find(ind) != def_map.end());
- if (def_map[ind])
- {
- def_map[ind] = 0;
- tensor_builder->notifyFirstUse(ind);
- }
+ def_map[ind] = 0;
+ tensor_builder->notifyFirstUse(ind);
}
+ }
- // Scan variable tensors
- // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
- // non-constant because of less memory usage by memory planning in here
- for (const auto &ind : op_inputs)
+ // Scan variable tensors
+ // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
+ // non-constant because of less memory usage by memory planning in here
+ for (const auto &ind : op_inputs)
+ {
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ const auto &operand = graph()->operands().at(ind);
+ if (operand.info().isVariable())
{
- if (!tensor_builder->isRegistered(ind))
- continue;
- const auto &operand = graph()->operands().at(ind);
- if (operand.info().isVariable())
- {
- // The variable tensor with buffer is not supported yet
- assert(operand.data() == nullptr);
- assert(operand.getUses().size() == 1 && !operand.getDef().valid());
- assert(lower_info.operand.at(ind)->def_factors().size() == 1 &&
- lower_info.operand.at(ind)->use_factors().size() == 1);
- assert(uses_map[ind] == 1 && def_map[ind] == 0);
- tensor_builder->notifyFirstUse(ind);
- }
+ // The variable tensor with buffer is not supported yet
+ assert(operand.data() == nullptr);
+ assert(operand.getUses().size() == 1 && !operand.getDef().valid());
+ assert(uses_map[ind] == 1 && def_map[ind] == 0);
+ tensor_builder->notifyFirstUse(ind);
}
+ }
- for (const auto &ind : op_inputs)
+ for (const auto &ind : op_inputs)
+ {
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ assert(uses_map.find(ind) != uses_map.end());
+ assert(uses_map[ind] > 0);
+ uses_map[ind]--;
+ if (uses_map[ind] == 0)
{
- if (!tensor_builder->isRegistered(ind))
- continue;
- assert(uses_map.find(ind) != uses_map.end());
- assert(uses_map[ind] > 0);
- uses_map[ind]--;
- if (uses_map[ind] == 0)
- {
- // plan for deallocation of static tensornode
- tensor_builder->notifyLastUse(ind);
- }
+ // plan for deallocation of static tensornode
+ tensor_builder->notifyLastUse(ind);
}
}
}
+ _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
+ if (uses_map[ind] == 0)
+ {
+ tensor_builder->notifyLastUse(ind);
+ }
+ });
+
// Dispose and validate
for (const auto &ind : constants)
{
}
assert(
- std::all_of(uses_map.begin(), uses_map.end(),
- [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+ std::all_of(uses_map.begin(), uses_map.end(),
+ [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
assert(
- std::all_of(def_map.begin(), def_map.end(),
- [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+ std::all_of(def_map.begin(), def_map.end(),
+ [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
}
-ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs,
- const ir::LowerInfoMap &lower_info)
+ITensorRegistry *BackendContext::genTensors()
{
optimizer->optimize();
- for (const auto op_seq_ind : order)
- {
- const auto &op_seq = op_seqs.at(op_seq_ind);
- auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
- ir::Remove::DUPLICATED;
- for (const auto op_ind : op_seq)
- {
- bool op_assigned = [&]() {
- for (auto &op_info : operation_list())
- if (op_info.index == op_ind)
- return true;
- return false;
- }();
- if (!op_assigned)
- continue;
+ graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+ if (external_operands().contains(ind))
+ return;
- const auto &op = graph()->operations().at(op_ind);
- for (const auto &index : (op.getInputs() + op.getOutputs()) | ir::Remove::UNDEFINED)
- {
- if (!tensor_builder->isRegistered(index) && !model_io.contains(index) &&
- find(operand_list().begin(), operand_list().end(), index) != operand_list().end())
- {
- const auto &operand_lower_info =
- lower_info.operand.at(index)->def_factors().getOnlyElement();
-
- // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl)
- // op.getOutputs() of permute (CPU) returns tensor A
- // but tensor A belongs to the backend of acl_cl.
- // So, we have to make this tensor NOT registered for CPU.
- if (operand_lower_info.backend() != backend())
- continue;
-
- const auto &obj = graph()->operands().at(index);
- const auto frontend_layout = op_seq.getLayout();
- const auto backend_layout = operand_lower_info.layout();
- ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
- obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
- tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
- }
- }
- }
- }
+ const auto frontend_layout = graph()->layout();
+ const auto backend_layout = operand_layouts().at(ind);
+ ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+ obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+ tensor_builder->registerTensorInfo(ind, backend_info, backend_layout);
+ });
// TODO Get compiler options from compiler, and use it rather than getting it from Env
if (util::getConfigString(util::config::EXECUTOR) == "Linear")
{
- planTensors(order, op_seqs, lower_info);
+ planTensors();
}
else
{
// For the executors that does not have fixed linear execution order:
// To make tensors never be deallocated, this is a workaround to use static memory planner
- for (auto ind : operand_list())
- {
+ graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
if (tensor_builder->isRegistered(ind))
tensor_builder->notifyFirstUse(ind);
- }
+ });
}
tensor_builder->prepare();
return tensor_registry.get();
}
-FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs)
+FunctionMap BackendContext::genKernels()
{
FunctionMap ret;
- for (auto op_seq_ind : order)
+ for (auto op_ind : _data.op_order)
{
- const auto &op_seq = op_seqs.at(op_seq_ind);
- bool assigned = [&]() {
- for (auto op_info : operation_list())
- if (op_seq.exist(op_info.index))
- return true;
- return false;
- }();
- if (!assigned)
- continue;
- auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
- ret.emplace_back(op_seq_ind, std::move(fn_seq));
+ auto fn_seq = kernel_gen->generate(op_ind);
+ ret.emplace_back(op_ind, std::move(fn_seq));
}
tensor_builder->allocate();
initConsts();
// NOTE For memory optimization, we want to free some operand data
- for (auto ind : operand_list())
- {
- // TODO Remove const_cast
- auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
- obj.releaseData();
- }
+ const_cast<ir::Graph &>(*_data.graph)
+ .operands()
+ .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
for (auto &it : ret)
{
return ret;
}
-} // namespace neon
+} // namespace acl_neon
} // namespace backend
} // namespace onert
class BackendContext : public onert::backend::BackendContext
{
public:
- BackendContext(const Backend *backend, const ir::Graph *graph,
+ BackendContext(const Backend *backend, ContextData &&data,
std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
- : onert::backend::BackendContext(backend, graph, tensor_registry),
- tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
- kernel_gen{kernel_gen}
+ : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
+ tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, kernel_gen{
+ kernel_gen}
{
}
- ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs,
- const ir::LowerInfoMap &lower_info) override;
- FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs) override;
+ ITensorRegistry *genTensors() override;
+ FunctionMap genKernels() override;
private:
void initConsts();
- void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+ void planTensors();
public:
// TODO Make it private
ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
const std::shared_ptr<ITensorRegistry> &tensor_reg)
- : acl_common::AclConstantInitializer{operands, tensor_reg}
+ : acl_common::AclConstantInitializer{operands, tensor_reg}
{
// DO NOTHING
}
{
const int32_t value = base[i * 2 + j];
int32_t *into = reinterpret_cast<int32_t *>(
- // The coordinates of NETensor are different from the coordiantes of CLTensor in
- // this operand.
- // NEON : {j, reversed i}
- // CL : {reversed i, j}
- tensor.buffer() + tensor.calcOffset({j, shape.dim(0) - i - 1}));
+ // The coordinates of NETensor are different from the coordiantes of CLTensor in
+ // this operand.
+ // NEON : {j, reversed i}
+ // CL : {reversed i, j}
+ tensor.buffer() + tensor.calcOffset({j, shape.dim(0) - i - 1}));
*into = value;
}
}
using ::onert::backend::acl_common::asAclFunction;
using ActivationBuilder = ::onert::backend::acl_common::AclActivationBuilder<
- ::arm_compute::ITensor, ::arm_compute::NEActivationLayer, acl_common::AclFunction>;
+ ::arm_compute::ITensor, ::arm_compute::NEActivationLayer, acl_common::AclFunction>;
KernelGenerator::KernelGenerator(
- const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder,
- const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
- : _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
- _tensor_reg(tensor_reg), _current_layout(ir::Layout::UNKNOWN)
+ const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
+ : basic::KernelGeneratorBase{graph}, _ctx(graph.operands()),
+ _operations_ctx(graph.operations()), _current_layout{graph.layout()},
+ _tensor_builder(tensor_builder), _tensor_reg(tensor_reg)
{
// DO NOTHING
}
-void KernelGenerator::visit(const ir::OpSequence &op_seq)
+std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
{
- // TODO Move this to IKernelGenerator
- // (all derivatives have the same implementation for this)
- assert(!_return_fn_seq);
- _return_fn_seq = std::make_unique<exec::FunctionSequence>();
- _return_fn_seq->enableDynamicShapeInferer(false);
-
- _current_layout = op_seq.getLayout();
- for (const auto &operation_idx : op_seq.operations())
- {
- const auto &node = _operations_ctx.at(operation_idx);
- node.accept(*this);
- _return_fn_seq->append(releaseFunction());
- }
+ auto ret = std::make_unique<exec::FunctionSequence>();
+ ret->enableDynamicShapeInferer(false);
+
+ const auto &op = _graph.operations().at(ind);
+ op.accept(*this);
+ ret->append(releaseFunction());
+ return ret;
}
void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
}
assert(axis_value >= 0 && axis_value < ifm_rank);
const auto fixed_axis =
- acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
+ acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
auto reduce_type = node.param().is_arg_max ? ::arm_compute::ReductionOperation::ARG_IDX_MAX
: ::arm_compute::ReductionOperation::ARG_IDX_MIN;
auto fn = acl_common::generateLayer<arm_compute::NEArgMinMaxLayer>(
- ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(), reduce_type);
+ ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(), reduce_type);
_return_fn = asAclFunction(std::move(fn));
}
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
const auto block_size_index{
- node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
+ node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
const auto NNApiInputs = 2;
if (node.getInputs().size() != NNApiInputs)
assert(_ctx.at(block_size_index).data());
auto fn = acl_common::generateLayer<arm_compute::NEBatchToSpaceLayer>(
- ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
+ ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
_return_fn = asAclFunction(std::move(fn));
}
case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
{
fn = acl_common::generateLayer<arm_compute::NEArithmeticAddition>(
- lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
- arm_compute::ConvertPolicy::SATURATE);
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+ arm_compute::ConvertPolicy::SATURATE);
break;
}
case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
{
fn = acl_common::generateLayer<arm_compute::NEArithmeticSubtraction>(
- lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
- arm_compute::ConvertPolicy::SATURATE);
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+ arm_compute::ConvertPolicy::SATURATE);
break;
}
case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
{
// RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO
fn = acl_common::generateLayer<arm_compute::NEPixelWiseMultiplication>(
- lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
- arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
+ arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
break;
}
case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
{
fn = acl_common::generateLayer<arm_compute::NEElementwiseDivision>(
- lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
break;
}
default:
break;
}
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
+ asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Conv2D &node)
const auto ker_width = ker_shape.dim(2);
const auto stride = node.param().stride;
- const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
- ker_width, ker_height);
+ const auto padding =
+ ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, ker_width, ker_height);
const auto activation = node.param().activation;
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
const auto act_info = acl_common::asActivationLayerInfo(activation);
auto fn = acl_common::generateLayer<arm_compute::NEConvolutionLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
- ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info,
- ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
+ ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info,
+ ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
_return_fn = asAclFunction(std::move(fn));
}
auto input_tensor = _tensor_reg->getAclTensor(input_index);
auto fn = acl_common::generateLayer<arm_compute::NEDepthToSpaceLayer>(
- input_tensor->handle(), output_tensor->handle(), block_size);
+ input_tensor->handle(), output_tensor->handle(), block_size);
_return_fn = asAclFunction(std::move(fn));
}
const auto stride = node.param().stride;
const auto dilation = node.param().dilation;
const auto padding =
- ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, ker_width,
- ker_height, dilation.width_factor, dilation.height_factor);
+ ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+ dilation.width_factor, dilation.height_factor);
const auto multiplier = node.param().multiplier;
const auto activation = node.param().activation;
const auto dilation_info = acl_common::asDilation(dilation.width_factor, dilation.height_factor);
auto fn = acl_common::generateLayer<arm_compute::NEDepthwiseConvolutionLayer>(
- ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(),
- conv_info, multiplier, act_info, dilation_info);
+ ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(),
+ conv_info, multiplier, act_info, dilation_info);
_return_fn = asAclFunction(std::move(fn));
}
}
auto output_tensor = _tensor_reg->getAclTensor(ofm_index);
- std::vector<::arm_compute::ITensor *> input_tensors;
+ std::vector<const ::arm_compute::ITensor *> input_tensors;
for (const auto &ifm_ind : input_indexes)
input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle());
std::unique_ptr<::arm_compute::IFunction> fn;
if (input_indexes.size() < 2)
{
- fn = acl_common::generateLayer<arm_compute::NECopy>(input_tensors.at(0),
- output_tensor->handle());
+ ::arm_compute::ITensor *input_tesor = _tensor_reg->getAclTensor(input_indexes.at(0))->handle();
+ fn = acl_common::generateLayer<arm_compute::NECopy>(input_tesor, output_tensor->handle());
}
else
{
const auto frontend_layout = _current_layout;
const auto backend_layout = output_tensor->layout();
const auto fixed_axis =
- acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
+ acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
fn = acl_common::generateLayer<arm_compute::NEConcatenateLayer>(
- input_tensors, output_tensor->handle(), fixed_axis);
+ input_tensors, output_tensor->handle(), fixed_axis);
}
_return_fn = asAclFunction(std::move(fn));
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
- const ::arm_compute::ActivationLayerInfo act_info = acl_common::asActivationLayerInfo(
- node.param().op_type, node.param().alpha, node.param().beta);
+ const ::arm_compute::ActivationLayerInfo act_info =
+ acl_common::asActivationLayerInfo(node.param().op_type, node.param().alpha, node.param().beta);
std::unique_ptr<arm_compute::IFunction> fn =
- acl_common::generateLayer<arm_compute::NEActivationLayer>(ifm_tensor->handle(),
- ofm_tensor->handle(), act_info);
+ acl_common::generateLayer<arm_compute::NEActivationLayer>(ifm_tensor->handle(),
+ ofm_tensor->handle(), act_info);
_return_fn = asAclFunction(std::move(fn));
}
case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
{
fn = acl_common::generateLayer<arm_compute::NELogicalAnd>(
- lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
break;
}
case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
{
fn = acl_common::generateLayer<arm_compute::NELogicalOr>(
- lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
break;
}
case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
{
fn = acl_common::generateLayer<arm_compute::NEElementwiseMax>(
- lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
break;
}
case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
{
fn = acl_common::generateLayer<arm_compute::NEElementwiseMin>(
- lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
break;
}
default:
case ir::operation::ElementwiseUnary::Type::ABS:
{
const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(
- input_tensor->handle(), output_tensor->handle(), act_info);
+ input_tensor->handle(), output_tensor->handle(), act_info);
break;
}
case ir::operation::ElementwiseUnary::Type::CAST:
else
{
fn = acl_common::generateLayer<arm_compute::NECast>(
- input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
+ input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
}
break;
}
case ir::operation::ElementwiseUnary::Type::SQRT:
{
const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(
- input_tensor->handle(), output_tensor->handle(), act_info);
+ input_tensor->handle(), output_tensor->handle(), act_info);
break;
}
default:
auto values_tensor = _tensor_reg->getAclTensor(values_index);
auto fn = acl_common::generateLayer<arm_compute::NEEmbeddingLookup>(
- values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
+ values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
_return_fn = asAclFunction(std::move(fn));
}
const auto activation = node.param().activation;
if (node.param().weights_format == ir::FullyConnectedWeightsFormat::Shuffled16x1Float32)
throw std::runtime_error(
- "KernelGenerator(acl_neon): FullyConnected 16x1Float32 weights is not supported.");
+ "KernelGenerator(acl_neon): FullyConnected 16x1Float32 weights is not supported.");
auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ITensor,
::arm_compute::NEFullyConnectedReshapingLayer>(
- node, _ctx, _tensor_builder, _tensor_reg, _current_layout);
+ node, _ctx, _tensor_builder, _tensor_reg, _current_layout);
_return_fn = std::make_unique<exec::FunctionSequence>(
- std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
+ std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
auto values_tensor = _tensor_reg->getAclTensor(values_index);
auto fn = acl_common::generateLayer<arm_compute::NEHashtableLookup>(
- lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
- output_tensor->handle(), hits_tensor->handle());
+ lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
+ output_tensor->handle(), hits_tensor->handle());
_return_fn = asAclFunction(std::move(fn));
}
}
auto fn = acl_common::generateLayer<arm_compute::NEGatherEx>(
- ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
+ ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
// Revert disabling applied dim_correction
if (ifm_tensor->dimension(0) == 1)
auto activation = node.param().activation;
auto fn = acl_common::generateLayer<arm_compute::NEInstanceNormalizationLayerEx>(
- ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(),
- epsilon);
+ ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(),
+ epsilon);
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
+ asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::L2Normalization &node)
// TODO Support optional constant dimension that normalization would be performed on
const auto normalization_axis = _ctx.at(ifm_index).shape().rank() - 1;
int32_t radius =
- 2 * ifm_shape.dim(normalization_axis) + 1; // normSize = depth(last dimension) * 2 + 1
- float alpha = 1.0f; // In the implementation to make alpha_ become 1
- float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction)
- float bias = 0.0f; // Don't offset the reduction.
+ 2 * ifm_shape.dim(normalization_axis) + 1; // normSize = depth(last dimension) * 2 + 1
+ float alpha = 1.0f; // In the implementation to make alpha_ become 1
+ float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction)
+ float bias = 0.0f; // Don't offset the reduction.
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
radius, alpha, beta, bias, false);
auto fn = acl_common::generateLayer<arm_compute::NENormalizationLayer>(
- ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
+ ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
_return_fn = asAclFunction(std::move(fn));
}
{
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{
- node.getInputs().at(ir::operation::LocalResponseNormalization::Input::INPUT)};
+ node.getInputs().at(ir::operation::LocalResponseNormalization::Input::INPUT)};
auto radius = node.param().radius;
auto alpha = node.param().alpha;
auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
const auto norm_info = ::arm_compute::NormalizationLayerInfo(
- ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
+ ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
auto fn = acl_common::generateLayer<arm_compute::NENormalizationLayer>(
- ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
+ ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
_return_fn = asAclFunction(std::move(fn));
}
const auto frontend_layout = _current_layout;
const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout();
const auto axis =
- acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
+ acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
}
UNUSED_RELEASE(input_type);
assert(input->info()->data_type() == acl_common::asDataType(input_type.type()));
assert(input->info()->quantization_info() ==
- ::arm_compute::QuantizationInfo(input_type.scale(), input_type.offset()));
+ ::arm_compute::QuantizationInfo(input_type.scale(), input_type.zero_point()));
const auto pixel_value =
- ::arm_compute::PixelValue(0, input->info()->data_type(), input->info()->quantization_info());
+ ::arm_compute::PixelValue(0, input->info()->data_type(), input->info()->quantization_info());
auto fn =
- acl_common::generateLayer<arm_compute::NEPadLayer>(input, output, padding_list, pixel_value);
+ acl_common::generateLayer<arm_compute::NEPadLayer>(input, output, padding_list, pixel_value);
_return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Pool2D &node)
{
auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
- node, _ctx, _tensor_reg, _current_layout, acl_common::convertPoolType(node.param().op_type));
+ node, _ctx, _tensor_reg, _current_layout, acl_common::convertPoolType(node.param().op_type));
const auto ofm_index{node.getOutputs().at(0)};
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
const auto activation = node.param().activation;
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(raw_fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
+ asAclFunction(std::move(raw_fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Permute &node)
auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index);
auto fn = acl_common::generateLayer<arm_compute::NEPReluLayer>(
- ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
+ ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
_return_fn = asAclFunction(std::move(fn));
}
const auto frontend_layout = _current_layout;
const auto backend_layout = input_tensor->layout();
const auto reduce_axes =
- acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
+ acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
const auto reduce_type = node.param().reduce_type;
const auto keep_dims = node.param().keep_dims;
else
{
fn = acl_common::generateLayer<arm_compute::NEReduceOperation>(
- input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(),
- acl_common::convertReduceType(reduce_type));
+ input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(),
+ acl_common::convertReduceType(reduce_type));
}
_return_fn = asAclFunction(std::move(fn));
}
auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
auto fn = acl_common::generateLayer<arm_compute::NEScale>(
- ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::InterpolationPolicy::BILINEAR,
- ::arm_compute::BorderMode::REPLICATE, ::arm_compute::PixelValue(0.f),
- ::arm_compute::SamplingPolicy::TOP_LEFT);
+ ifm_tensor->handle(), ofm_tensor->handle(),
+ ::arm_compute::ScaleKernelInfo{::arm_compute::InterpolationPolicy::BILINEAR,
+ ::arm_compute::BorderMode::REPLICATE,
+ ::arm_compute::PixelValue(0.f),
+ ::arm_compute::SamplingPolicy::TOP_LEFT, false /*use padding*/});
_return_fn = asAclFunction(std::move(fn));
}
{
const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)};
const auto hidden_state_out_index{
- node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
+ node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)};
const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)};
const auto recurrent_weights_index{
- node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
+ node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)};
const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)};
auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
auto copy_layer = acl_common::generateLayer<arm_compute::NECopy>(
- hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
+ hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
_return_fn = asAclFunction(std::move(copy_layer));
auto fn = acl_common::generateLayer<arm_compute::NERNNLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
- weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(),
- hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+ weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(),
+ hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
_return_fn = asAclFunction(std::move(fn));
}
auto output_tensor = _tensor_reg->getAclTensor(output_index);
auto input_tensor = _tensor_reg->getAclTensor(input_index);
- // Disable applied dim_correction
- if (input_tensor->num_dimensions() != input_tensor->info()->num_dimensions())
- {
- // This means that high dimension's value is 1 and input tensor is applied dim_correction
- acl_common::disableDimCorrection(input_tensor);
- }
-
+ // NOTE NESoftmaxLayer's default axis is -1
auto fn = acl_common::generateLayer<arm_compute::NESoftmaxLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
- output_tensor->handle(), beta);
-
- // Revert disabling applied dim_correction
- if (input_tensor->dimension(0) == 1)
- {
- acl_common::disableDimCorrection(input_tensor);
- }
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+ output_tensor->handle(), beta);
_return_fn = asAclFunction(std::move(fn));
}
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
const auto block_size_index{
- node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
+ node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
assert(_ctx.at(paddings_index).data());
auto fn = acl_common::generateLayer<arm_compute::NESpaceToBatchLayer>(
- ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
- ofm_tensor->handle());
+ ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
+ ofm_tensor->handle());
_return_fn = asAclFunction(std::move(fn));
}
auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
auto fn = acl_common::generateLayer<arm_compute::NESpaceToDepthLayer>(
- ifm_tensor->handle(), ofm_tensor->handle(), block_size);
+ ifm_tensor->handle(), ofm_tensor->handle(), block_size);
_return_fn = asAclFunction(std::move(fn));
}
axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value();
auto fn =
- acl_common::generateLayer<arm_compute::NESplit>(ifm_tensor->handle(), output_tensors, axis);
+ acl_common::generateLayer<arm_compute::NESplit>(ifm_tensor->handle(), output_tensors, axis);
_return_fn = asAclFunction(std::move(fn));
}
auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
auto fn = acl_common::generateLayer<arm_compute::NEElementwiseSquaredDiff>(
- lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
_return_fn = asAclFunction(std::move(fn));
}
{
auto axis = ::onert::backend::acl_common::ToARMComputeAxis(input_rank, n, frontend_layout,
backend_layout)
- .value();
+ .value();
int32_t begin_value = *(reinterpret_cast<const int32_t *>(beginData_base) + n);
starts[axis] = begin_value;
}
auto fn = acl_common::generateLayer<arm_compute::NESlice>(
- inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
+ inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
_return_fn = asAclFunction(std::move(fn));
}
{
auto axis = ::onert::backend::acl_common::ToARMComputeAxis(input_rank, n, frontend_layout,
backend_layout)
- .value();
+ .value();
int32_t start_value = *(reinterpret_cast<const int32_t *>(startData_base) + n);
starts[axis] = start_value;
const auto begin_mask = acl_common::ReorderBits<int32_t>(node.param().begin_mask, input_rank);
const auto end_mask = acl_common::ReorderBits<int32_t>(node.param().end_mask, input_rank);
const auto shrink_axis_mask =
- acl_common::ReorderBits<int32_t>(node.param().shrink_axis_mask, input_rank);
+ acl_common::ReorderBits<int32_t>(node.param().shrink_axis_mask, input_rank);
::arm_compute::Coordinates starts_set;
::arm_compute::Coordinates ends_set;
}
// Disable applied dim_correction
- if (inputData_tensor->num_dimensions() != inputData_tensor->info()->num_dimensions())
+ if (static_cast<size_t>(inputData_tensor->getShape().rank()) !=
+ inputData_tensor->info()->num_dimensions())
{
// This means that high dimension's value is 1 and input tensor is applied dim_correction
acl_common::disableDimCorrection(inputData_tensor);
}
auto fn = acl_common::generateLayer<arm_compute::NEStridedSlice>(
- inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
- begin_mask, end_mask, shrink_axis_mask);
+ inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
+ begin_mask, end_mask, shrink_axis_mask);
// Revert disabling applied dim_correction
- if (inputData_tensor->dimension(0) == 1)
+ if (inputData_tensor->getShape().dim(0) == 1)
{
acl_common::enableDimCorrection(inputData_tensor);
}
if (node.param().padding.type == ir::PaddingType::VALID)
{
invalid_horizontal =
- ofm_shape.W - (1 + (ifm_shape.W - 1) * stride.horizontal) - (ker_shape.W - 1);
+ ofm_shape.W - (1 + (ifm_shape.W - 1) * stride.horizontal) - (ker_shape.W - 1);
invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
}
const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
auto fn = acl_common::generateLayer<arm_compute::NETransposeConvLayer>(
- ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info,
- invalid_horizontal, invalid_vertical);
+ ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info,
+ invalid_horizontal, invalid_vertical);
_return_fn = asAclFunction(std::move(fn));
}
else
{
auto backend_pv =
- acl_common::getARMComputePermutationVector(rank, pv, frontend_layout, backend_layout);
+ acl_common::getARMComputePermutationVector(rank, pv, frontend_layout, backend_layout);
fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(),
ofm_tensor->handle(), backend_pv);
axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value();
// Disable applied dim_correction
- if (input_tensor->num_dimensions() != input_tensor->info()->num_dimensions())
+ if (static_cast<size_t>(input_tensor->getShape().rank()) !=
+ input_tensor->info()->num_dimensions())
{
// This means that high dimension's value is 1 and input tensor is applied dim_correction
acl_common::disableDimCorrection(input_tensor);
}
auto fn =
- acl_common::generateLayer<arm_compute::NEUnstack>(input_tensor->handle(), outputs, axis);
+ acl_common::generateLayer<arm_compute::NEUnstack>(input_tensor->handle(), outputs, axis);
// Revert disabling applied dim_correction
- if (input_tensor->dimension(0) == 1)
+ if (input_tensor->getShape().dim(0) == 1)
{
acl_common::enableDimCorrection(input_tensor);
}
auto input1_tensor = _tensor_reg->getAclTensor(input1_index);
auto fn = acl_common::generateLayer<arm_compute::NEElementwiseComparison>(
- input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
- (arm_compute::ComparisonOperation)comparison_type);
+ input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
+ (arm_compute::ComparisonOperation)comparison_type);
_return_fn = asAclFunction(std::move(fn));
}
axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
auto fn = acl_common::generateLayer<arm_compute::NEOneHot>(
- indices_tensor->handle(), depth_tensor->handle(), onvalue_tensor->handle(),
- offvalue_tensor->handle(), output_tensor->handle(), axis);
+ indices_tensor->handle(), depth_tensor->handle(), onvalue_tensor->handle(),
+ offvalue_tensor->handle(), output_tensor->handle(), axis);
_return_fn = asAclFunction(std::move(fn));
}
#ifndef __ONERT_BACKEND_ACL_NEON_KERNEL_GENERATOR_H__
#define __ONERT_BACKEND_ACL_NEON_KERNEL_GENERATOR_H__
-#include <backend/cpu_common/KernelGeneratorBase.h>
+#include <backend/basic/KernelGeneratorBase.h>
#include "ir/Operands.h"
#include "TensorBuilder.h"
namespace acl_neon
{
-class KernelGenerator : public cpu_common::KernelGeneratorBase
+class KernelGenerator : public basic::KernelGeneratorBase
{
public:
- KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder,
+ KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg);
- void visit(const ir::OpSequence &) override;
+ std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override;
+private:
void visit(const ir::operation::ArgMinMax &) override;
void visit(const ir::operation::BatchToSpaceND &) override;
void visit(const ir::operation::BinaryArithmetic &) override;
private:
const ir::Operands &_ctx;
const ir::Operations &_operations_ctx;
+ const ir::Layout _current_layout;
std::shared_ptr<TensorBuilder> _tensor_builder;
std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg;
- ir::Layout _current_layout;
};
} // namespace acl_neon
{
Optimizer::Optimizer(BackendContext *context)
- : _context{context},
- _tensor_builder{std::dynamic_pointer_cast<TensorBuilder>(context->tensor_builder)}
+ : _context{context}, _tensor_builder{
+ std::dynamic_pointer_cast<TensorBuilder>(context->tensor_builder)}
{
assert(context);
}
// Concat elimination (build subtensor info)
{
acl_common::AclSubTensorAnalyzer sa{*_context->graph()};
- for (auto op_info : _context->operation_list())
- {
- auto &op = _context->graph()->operations().at(op_info.index);
- sa.setLayout(op_info.layout);
- op.accept(sa);
- }
-
- _tensor_builder->parent_map(sa.releaseParentMap());
+ sa.setUsePadding();
+ _context->graph()->operations().iterate(
+ [&](const ir::OperationIndex &, const ir::Operation &op) {
+ sa.setLayout(_context->graph()->layout());
+ op.accept(sa);
+ });
}
}
{
using TensorBuilder =
- acl_common::AclTensorBuilder<operand::INETensor, operand::NETensor, operand::NESubTensor>;
+ acl_common::AclTensorBuilder<operand::INETensor, operand::NETensor, operand::NESubTensor>;
} // namespace acl_neon
} // namespace backend
{
using MemoryManager =
- acl_common::AclMemoryManager<operand::INETensor, operand::NETensor, operand::NESubTensor>;
+ acl_common::AclMemoryManager<operand::INETensor, operand::NETensor, operand::NESubTensor>;
using LinearMemoryManager = acl_common::AclLinearMemoryManager<
- operand::INETensor, operand::NETensor, operand::NESubTensor,
- ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
- ::arm_compute::OffsetLifetimeManager, ::arm_compute::Allocator, ::arm_compute::MemoryGroup>;
+ operand::INETensor, operand::NETensor, operand::NESubTensor, ::arm_compute::MemoryManagerOnDemand,
+ ::arm_compute::PoolManager, ::arm_compute::OffsetLifetimeManager, ::arm_compute::Allocator,
+ ::arm_compute::MemoryGroup>;
using InternalBufferManager = acl_common::AclInternalBufferManager<
- ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
- ::arm_compute::OffsetLifetimeManager, ::arm_compute::Allocator>;
+ ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
+ ::arm_compute::OffsetLifetimeManager, ::arm_compute::Allocator>;
using TensorManager = acl_common::AclTensorManager<acl_neon::operand::INETensor, operand::NETensor,
operand::NESubTensor>;
class INETensor : public acl_common::IACLTensor
{
public:
+ INETensor(size_t rank) : IACLTensor{rank} {}
const arm_compute::ITensor *handle() const override = 0;
arm_compute::ITensor *handle() override = 0;
void access(const std::function<void(ITensor &tensor)> &fn) final;
NESubTensor::NESubTensor(INETensor *parent, const arm_compute::TensorShape &tensor_shape,
const arm_compute::Coordinates &coords, size_t rank, bool extend_parent)
- : _ne_sub_tensor(std::make_shared<arm_compute::SubTensor>(parent->handle(), tensor_shape,
- coords, extend_parent)),
- _rank{rank}
+ : INETensor{rank}, _ne_sub_tensor(std::make_shared<arm_compute::SubTensor>(
+ parent->handle(), tensor_shape, coords, extend_parent))
{
// DO NOTHING
}
NESubTensor(INETensor *parent, const arm_compute::TensorShape &tensor_shape,
const arm_compute::Coordinates &coords, size_t rank, bool extend_parent = false);
-public:
- size_t num_dimensions() const final { return _rank; }
-
public:
const arm_compute::SubTensor *handle() const override;
arm_compute::SubTensor *handle() override;
private:
std::shared_ptr<arm_compute::SubTensor> _ne_sub_tensor;
- size_t _rank;
};
} // namespace operand
{
NETensor::NETensor(const arm_compute::TensorInfo &info, size_t rank, size_t num_uses)
- : _ne_tensor(std::make_shared<arm_compute::Tensor>()), _rank{rank}, _num_uses{num_uses}
+ : INETensor{rank}, _ne_tensor(std::make_shared<arm_compute::Tensor>()), _num_uses{num_uses}
{
allocator()->init(info);
}
public:
NETensor(const arm_compute::TensorInfo &info, size_t rank, size_t num_uses);
-public:
- size_t num_dimensions() const final { return _rank; }
-
public:
const arm_compute::Tensor *handle() const override;
arm_compute::Tensor *handle() override;
private:
std::shared_ptr<arm_compute::Tensor> _ne_tensor;
- size_t _rank;
size_t _num_uses;
};
#include "BackendContext.h"
#include "Config.h"
-#include "ConstantInitializer.h"
#include "KernelGenerator.h"
#include <backend/Backend.h>
std::shared_ptr<IConfig> config() const override { return _config; }
- std::unique_ptr<onert::backend::BackendContext>
- newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &kb,
- bool) const override
+ std::unique_ptr<onert::backend::BackendContext> newContext(ContextData &&data) const override
{
- const auto &operands = graph.operands();
- const auto &operations = graph.operations();
- auto context = std::make_unique<BackendContext>(this, &graph);
- auto tr = std::make_shared<cpu_common::TensorRegistry>();
+ auto custom_kernel_builder = data.custom_kernel_builder;
+ auto &graph = *data.graph;
+ auto context = std::make_unique<BackendContext>(this, std::move(data));
+ auto tr = std::make_shared<basic::TensorRegistry>();
auto tb = std::make_shared<TensorBuilder>(tr);
context->tensor_registry = tr;
context->tensor_builder = tb;
- context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
- context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb,
+ context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, custom_kernel_builder,
context->external_context());
return context;
}
#include "ir/Index.h"
#include "ir/OperandIndexMap.h"
#include "ir/OperandIndexSequence.h"
-#include "backend/cpu_common/BackendContextHelpers.h"
+#include "backend/basic/BackendContextHelpers.h"
namespace onert
{
namespace cpu
{
-void BackendContext::initConsts()
-{
- for (auto &op : operation_list())
- {
- constant_initializer->setLayout(op.layout);
- graph()->operations().at(op.index).accept(*constant_initializer);
- }
-
- for (auto ind : operand_list())
- {
- const auto &obj = graph()->operands().at(ind);
- if (obj.isConstant() && !constant_initializer->exist(ind))
- {
- constant_initializer->registerDefaultInitializer(ind, obj);
- }
- }
-
- constant_initializer->run();
-}
-
-ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs,
- const ir::LowerInfoMap &lower_info)
-{
- auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
- ir::Remove::DUPLICATED;
- for (auto index : operand_list())
- {
- if (model_io.contains(index))
- continue;
- const auto &obj = graph()->operands().at(index);
- const auto frontend_layout = [&]() {
- if (obj.getUses().size() == 0)
- return ir::Layout::UNKNOWN;
- auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses?
- for (auto &operation_info : operation_list())
- {
- if (operation_info.index == use_op_ind)
- return operation_info.layout;
- }
- return ir::Layout::UNKNOWN;
- }();
- const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement();
- if (permute_factor.backend() != backend())
- continue;
- const auto backend_layout = permute_factor.layout();
- ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
- obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
- tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
- }
-
- // TODO Get compiler options from compiler, and use it rather than getting it from Env
- if (util::getConfigString(util::config::EXECUTOR) == "Linear")
- {
- cpu_common::planTensors(*this, order, op_seqs, lower_info);
- }
- else
- {
- // For the executors that does not have fixed linear execution order:
- // To make tensors never be deallocated, this is a workaround to use static memory planner
- for (auto ind : operand_list())
- {
- if (tensor_builder->isRegistered(ind))
- tensor_builder->notifyFirstUse(ind);
- }
- }
+ITensorRegistry *BackendContext::genTensors() { return basic::genTensors(*this); }
- tensor_builder->prepare();
-
- return tensor_registry.get();
-}
-
-FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs)
+FunctionMap BackendContext::genKernels()
{
FunctionMap ret;
- for (auto op_seq_ind : order)
+ for (auto op_ind : _data.op_order)
{
- const auto &op_seq = op_seqs.at(op_seq_ind);
- bool assigned = [&]() {
- for (auto op_info : operation_list())
- if (op_seq.exist(op_info.index))
- return true;
- return false;
- }();
- if (!assigned)
- continue;
- auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
- ret.emplace_back(op_seq_ind, std::move(fn_seq));
+ auto fn_seq = kernel_gen->generate(op_ind);
+ ret.emplace_back(op_ind, std::move(fn_seq));
}
- initConsts();
+ basic::initConsts(*this);
// NOTE For memory optimization, we want to free some operand data
- for (auto ind : operand_list())
- {
- // TODO Remove const_cast
- auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
- obj.releaseData();
- }
+ const_cast<ir::Graph &>(*_data.graph)
+ .operands()
+ .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
for (auto &it : ret)
{
#include <backend/BackendContext.h>
#include "TensorBuilder.h"
-#include "ConstantInitializer.h"
#include "KernelGenerator.h"
#include "ExternalContext.h"
class BackendContext : public onert::backend::BackendContext
{
public:
- BackendContext(const Backend *backend, const ir::Graph *graph,
+ BackendContext(const Backend *backend, ContextData &&data,
std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
- std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
- : onert::backend::BackendContext(backend, graph, tensor_registry),
- tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
- kernel_gen{kernel_gen}, _external_context(new ExternalContext)
+ : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
+ tensor_builder{tensor_builder}, kernel_gen{kernel_gen}, _external_context(new ExternalContext)
{
}
- ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs,
- const ir::LowerInfoMap &lower_info) override;
- FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs) override;
+ ITensorRegistry *genTensors() override;
+ FunctionMap genKernels() override;
std::shared_ptr<ExternalContext> external_context() { return _external_context; }
-private:
- void initConsts();
- void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
-
public:
// TODO Make it private
std::shared_ptr<TensorBuilder> tensor_builder;
- std::shared_ptr<ConstantInitializer> constant_initializer;
std::shared_ptr<KernelGenerator> kernel_gen;
private:
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__
-#define __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__
-
-#include <backend/cpu_common/ConstantInitializer.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-
-using ConstantInitializer = cpu_common::ConstantInitializer;
-
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__
#include <util/ConfigSource.h>
#include <ruy/context.h>
-namespace
-{
-const int kDefaultNumThreadpoolThreads = 1;
-}
-
namespace onert
{
namespace backend
class ExternalContext
{
+private:
+ static const int kDefaultNumThreadpoolThreads = 1;
+
public:
ExternalContext() : _ruy_context(new ruy::Context)
{
void setMaxNumThreads(int max_num_threads)
{
const int target_num_threads =
- max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads;
+ max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads;
_ruy_context->set_max_num_threads(target_num_threads);
}
#include "ops/PadLayer.h"
#include "ops/PoolLayer.h"
#include "ops/PowLayer.h"
+#include "ops/QuantizeLayer.h"
#include "ops/RangeLayer.h"
#include "ops/RankLayer.h"
#include "ops/ReduceLayer.h"
} // namespace
KernelGenerator::KernelGenerator(
- const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder,
- const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
- const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
- const std::shared_ptr<ExternalContext> &external_context)
- : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
- _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
- _current_layout(ir::Layout::UNKNOWN), _external_context(external_context)
+ const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
+ const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
+ const std::shared_ptr<ExternalContext> &external_context)
+ : basic::KernelGeneratorBase{graph},
+ _ctx(graph.operands()), _operations_ctx{graph.operations()}, _current_layout{graph.layout()},
+ _tensor_builder(tensor_builder), _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
+ _external_context(external_context)
{
// DO NOTHING
}
-void KernelGenerator::visit(const ir::operation::AddN &node)
+std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
{
- const auto output_index{node.getOutputs().at(0)};
-
- std::vector<const IPortableTensor *> input_tensors;
- for (auto &input_idx : node.getInputs())
- input_tensors.emplace_back(_tensor_reg->getPortableTensor(input_idx));
-
- auto output_tensor = _tensor_reg->getPortableTensor(output_index);
-
- auto fn = std::make_unique<ops::AddNLayer>();
-
- fn->configure(std::move(input_tensors), output_tensor);
-
- _return_fn = std::move(fn);
-}
+ auto ret = std::make_unique<exec::FunctionSequence>();
-void KernelGenerator::visit(const ir::OpSequence &op_seq)
-{
- assert(!_return_fn_seq);
assert(_tensor_builder->dynamicTensorManager());
assert(_tensor_reg);
auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
- _return_fn_seq = std::make_unique<exec::FunctionSequence>();
-
// Prepare to handle dynamic tensors later
auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
{
- dyn_ctx->op_seq = &op_seq;
+ dyn_ctx->op_ind = ind;
dyn_ctx->operations = &_operations_ctx;
dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
- dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
- _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
+ ret->dynamic_tensor_ctx(dyn_ctx);
}
- _current_layout = op_seq.getLayout();
- for (const auto &operation_idx : op_seq.operations())
+ auto &op = _graph.operations().at(ind);
+ op.accept(*this);
+ assert(_return_fn); // _return_fn must have been generated
+ ret->append(std::move(_return_fn));
+
+ for (auto ind : (op.getInputs() | ir::Remove::UNDEFINED) + op.getOutputs())
{
- const auto &node = _operations_ctx.at(operation_idx);
- node.accept(*this);
- _return_fn_seq->append(releaseFunction());
+ auto portable_tensor = _tensor_reg->getPortableTensor(ind);
+ if (portable_tensor)
+ {
+ assert(portable_tensor->layout() == ir::Layout::NHWC);
+ }
- for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
+ auto tensor = _tensor_reg->getNativeTensor(ind);
+ if (tensor)
{
- auto portable_tensor = _tensor_reg->getPortableTensor(ind);
- if (portable_tensor)
- {
- assert(portable_tensor->layout() == ir::Layout::NHWC);
- }
-
- auto tensor = _tensor_reg->getNativeTensor(ind);
- if (tensor)
- {
- tensor->increase_ref();
- }
+ tensor->increase_ref();
}
}
+ return ret;
+}
+
+void KernelGenerator::visit(const ir::operation::AddN &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+
+ std::vector<const IPortableTensor *> input_tensors;
+ for (auto &input_idx : node.getInputs())
+ input_tensors.emplace_back(_tensor_reg->getPortableTensor(input_idx));
+
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+
+ auto fn = std::make_unique<ops::AddNLayer>();
+
+ fn->configure(std::move(input_tensors), output_tensor);
+
+ _return_fn = std::move(fn);
}
void KernelGenerator::visit(const ir::operation::Conv2D &node)
const auto ker_width = ker_shape.dim(2);
const auto padding =
- ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
- dilation.width_factor, dilation.height_factor);
+ ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+ dilation.width_factor, dilation.height_factor);
fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
auto offvalue_tensor = _tensor_reg->getPortableTensor(offvalue_index);
assert(indices_tensor->data_type() == OperandType::INT32);
- assert(axis <= static_cast<int>(indices_tensor->num_dimensions()));
+ assert(axis <= static_cast<int>(indices_tensor->getShape().rank()));
auto fn = std::make_unique<ops::OneHotLayer>();
auto output_tensor = _tensor_reg->getPortableTensor(output_index);
auto input_tensor = _tensor_reg->getPortableTensor(input_index);
- auto fn = std::make_unique<ops::ElementwiseUnaryLayer>();
-
- fn->configure(input_tensor, output_tensor, convertElementwiseUnaryType(node.param().op_type));
-
- _return_fn = std::move(fn);
+ if (node.param().op_type == ir::operation::ElementwiseUnary::Type::QUANTIZE)
+ {
+ auto fn = std::make_unique<ops::QuantizeLayer>();
+ fn->configure(input_tensor, output_tensor);
+ _return_fn = std::move(fn);
+ }
+ else
+ {
+ auto fn = std::make_unique<ops::ElementwiseUnaryLayer>();
+ fn->configure(input_tensor, output_tensor, convertElementwiseUnaryType(node.param().op_type));
+ _return_fn = std::move(fn);
+ }
}
void KernelGenerator::visit(const ir::operation::ExpandDims &node)
const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
const auto padding =
- ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+ ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
const auto activation = node.param().activation;
auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
void KernelGenerator::visit(const ir::operation::LSTM &node)
{
const auto scratch_buffer_index{
- node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
+ node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
const auto output_state_out_index{
- node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
+ node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
const auto cell_state_out_index{
- node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
+ node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
const auto input_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
const auto input_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
const auto input_to_cell_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
const auto input_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
const auto recurrent_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
const auto recurrent_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
const auto recurrent_to_cell_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
const auto recurrent_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
const auto cell_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
+ node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
const auto cell_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
+ node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
const auto cell_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
+ node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
const auto input_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
const auto forget_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
+ node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
const auto cell_gate_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
const auto output_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
+ node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
const auto projection_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
+ node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
const auto projection_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
+ node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
const auto output_state_in_index{
- node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
+ node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
const auto time_major = node.param().time_major;
(_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
_ctx.at(input_to_input_weights_index).shape().dim(1) != 0);
bool has_recurrent_to_input_weights =
- _ctx.exist(recurrent_to_input_weights_index) &&
- (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0);
+ _ctx.exist(recurrent_to_input_weights_index) &&
+ (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+ _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0);
// NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
// But the cell_to_input_weights does not exist in regular CIFG although peephole.
_ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
bool has_input_gate_bias =
- _ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0);
+ _ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0);
bool has_projection_weights = _ctx.exist(projection_weights_index) &&
(_ctx.at(projection_weights_index).shape().dim(0) != 0 &&
_ctx.at(projection_weights_index).shape().dim(1) != 0);
bool has_projection_bias =
- _ctx.exist(projection_bias_index) && _ctx.at(projection_bias_index).shape().dim(0);
+ _ctx.exist(projection_bias_index) && _ctx.at(projection_bias_index).shape().dim(0);
auto scratch_buffer_tensor = _ctx.exist(scratch_buffer_index)
- ? _tensor_reg->getPortableTensor(scratch_buffer_index)
- : nullptr; // optional
+ ? _tensor_reg->getPortableTensor(scratch_buffer_index)
+ : nullptr; // optional
auto output_state_out_tensor = _ctx.exist(output_state_out_index)
- ? _tensor_reg->getPortableTensor(output_state_out_index)
- : nullptr; // optional
- auto cell_state_out_tensor = _ctx.exist(cell_state_out_index)
- ? _tensor_reg->getPortableTensor(cell_state_out_index)
+ ? _tensor_reg->getPortableTensor(output_state_out_index)
: nullptr; // optional
+ auto cell_state_out_tensor = _ctx.exist(cell_state_out_index)
+ ? _tensor_reg->getPortableTensor(cell_state_out_index)
+ : nullptr; // optional
auto output_tensor = _tensor_reg->getPortableTensor(output_index);
auto input_tensor = _tensor_reg->getPortableTensor(input_index);
auto input_to_input_weights_tensor =
- has_input_to_input_weights ? _tensor_reg->getPortableTensor(input_to_input_weights_index)
- : nullptr; // optional
+ has_input_to_input_weights ? _tensor_reg->getPortableTensor(input_to_input_weights_index)
+ : nullptr; // optional
auto input_to_forget_weights_tensor =
- _tensor_reg->getPortableTensor(input_to_forget_weights_index);
+ _tensor_reg->getPortableTensor(input_to_forget_weights_index);
auto input_to_cell_weights_tensor = _tensor_reg->getPortableTensor(input_to_cell_weights_index);
auto input_to_output_weights_tensor =
- _tensor_reg->getPortableTensor(input_to_output_weights_index);
+ _tensor_reg->getPortableTensor(input_to_output_weights_index);
auto recurrent_to_input_weights_tensor =
- has_recurrent_to_input_weights
- ? _tensor_reg->getPortableTensor(recurrent_to_input_weights_index)
- : nullptr; // optional
+ has_recurrent_to_input_weights
+ ? _tensor_reg->getPortableTensor(recurrent_to_input_weights_index)
+ : nullptr; // optional
auto recurrent_to_forget_weights_tensor =
- _tensor_reg->getPortableTensor(recurrent_to_forget_weights_index);
+ _tensor_reg->getPortableTensor(recurrent_to_forget_weights_index);
auto recurrent_to_cell_weights_tensor =
- _tensor_reg->getPortableTensor(recurrent_to_cell_weights_index);
+ _tensor_reg->getPortableTensor(recurrent_to_cell_weights_index);
auto recurrent_to_output_weights_tensor =
- _tensor_reg->getPortableTensor(recurrent_to_output_weights_index);
+ _tensor_reg->getPortableTensor(recurrent_to_output_weights_index);
auto cell_to_input_weights_tensor = _tensor_reg->getPortableTensor(cell_to_input_weights_index);
auto cell_to_forget_weights_tensor =
- has_cell_to_forget_weights ? _tensor_reg->getPortableTensor(cell_to_forget_weights_index)
- : nullptr; // optional
+ has_cell_to_forget_weights ? _tensor_reg->getPortableTensor(cell_to_forget_weights_index)
+ : nullptr; // optional
auto cell_to_output_weights_tensor =
- has_cell_to_output_weights ? _tensor_reg->getPortableTensor(cell_to_output_weights_index)
- : nullptr; // optional
+ has_cell_to_output_weights ? _tensor_reg->getPortableTensor(cell_to_output_weights_index)
+ : nullptr; // optional
auto input_gate_bias_tensor =
- has_input_gate_bias ? _tensor_reg->getPortableTensor(input_gate_bias_index) : nullptr;
+ has_input_gate_bias ? _tensor_reg->getPortableTensor(input_gate_bias_index) : nullptr;
auto forget_gate_bias_tensor = _tensor_reg->getPortableTensor(forget_gate_bias_index);
auto cell_gate_bias_tensor = _tensor_reg->getPortableTensor(cell_gate_bias_index);
auto output_gate_bias_tensor = _tensor_reg->getPortableTensor(output_gate_bias_index);
auto cell_state_in_tensor = _tensor_reg->getPortableTensor(cell_state_in_index);
auto projection_weights_tensor = has_projection_weights
- ? _tensor_reg->getPortableTensor(projection_weights_index)
- : nullptr; // optional
+ ? _tensor_reg->getPortableTensor(projection_weights_index)
+ : nullptr; // optional
auto projection_bias_tensor = has_projection_bias
- ? _tensor_reg->getPortableTensor(projection_bias_index)
- : nullptr; // optional
+ ? _tensor_reg->getPortableTensor(projection_bias_index)
+ : nullptr; // optional
IPortableTensor *input_layer_norm_weights_tensor = nullptr;
IPortableTensor *forget_layer_norm_weights_tensor = nullptr;
if (node.getInputs().size() == 24)
{
const auto input_layer_norm_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_LAYER_NORMALIZATION_WEIGHTS)};
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_LAYER_NORMALIZATION_WEIGHTS)};
const auto forget_layer_norm_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::FORGET_LAYER_NORMALIZATION_WEIGHTS)};
+ node.getInputs().at(ir::operation::LSTM::Input::FORGET_LAYER_NORMALIZATION_WEIGHTS)};
const auto cell_layer_norm_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_LAYER_NORMALIZATION_WEIGHTS)};
+ node.getInputs().at(ir::operation::LSTM::Input::CELL_LAYER_NORMALIZATION_WEIGHTS)};
const auto output_layer_norm_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_LAYER_NORMALIZATION_WEIGHTS)};
+ node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_LAYER_NORMALIZATION_WEIGHTS)};
input_layer_norm_weights_tensor =
- _tensor_reg->getPortableTensor(input_layer_norm_weights_index);
+ _tensor_reg->getPortableTensor(input_layer_norm_weights_index);
forget_layer_norm_weights_tensor =
- _tensor_reg->getPortableTensor(forget_layer_norm_weights_index);
+ _tensor_reg->getPortableTensor(forget_layer_norm_weights_index);
cell_layer_norm_weights_tensor = _tensor_reg->getPortableTensor(cell_layer_norm_weights_index);
output_layer_norm_weights_tensor =
- _tensor_reg->getPortableTensor(output_layer_norm_weights_index);
+ _tensor_reg->getPortableTensor(output_layer_norm_weights_index);
}
auto fn = std::make_unique<ops::LSTMLayer>();
fn->configure(
- input_tensor, input_to_input_weights_tensor, input_to_forget_weights_tensor,
- input_to_cell_weights_tensor, input_to_output_weights_tensor,
- recurrent_to_input_weights_tensor, recurrent_to_forget_weights_tensor,
- recurrent_to_cell_weights_tensor, recurrent_to_output_weights_tensor,
- cell_to_input_weights_tensor, cell_to_forget_weights_tensor, cell_to_output_weights_tensor,
- input_layer_norm_weights_tensor, forget_layer_norm_weights_tensor,
- cell_layer_norm_weights_tensor, output_layer_norm_weights_tensor,
- /*aux_input=*/nullptr,
- /*aux_input_to_input_weights=*/nullptr,
- /*aux_input_to_forget_weights=*/nullptr,
- /*aux_input_to_cell_weights=*/nullptr,
- /*aux_input_to_output_weights=*/nullptr, input_gate_bias_tensor, forget_gate_bias_tensor,
- cell_gate_bias_tensor, output_gate_bias_tensor, projection_weights_tensor,
- projection_bias_tensor, output_state_in_tensor, cell_state_in_tensor, node.param(),
- /*forward_sequence=*/true, time_major,
- /*output_offset=*/0, scratch_buffer_tensor, output_state_out_tensor, cell_state_out_tensor,
- output_tensor,
- !_ctx.at(output_state_in_index).info().isVariable() /* means empty buffer on frontend now */,
- !_ctx.at(cell_state_in_index).info().isVariable());
+ input_tensor, input_to_input_weights_tensor, input_to_forget_weights_tensor,
+ input_to_cell_weights_tensor, input_to_output_weights_tensor, recurrent_to_input_weights_tensor,
+ recurrent_to_forget_weights_tensor, recurrent_to_cell_weights_tensor,
+ recurrent_to_output_weights_tensor, cell_to_input_weights_tensor, cell_to_forget_weights_tensor,
+ cell_to_output_weights_tensor, input_layer_norm_weights_tensor,
+ forget_layer_norm_weights_tensor, cell_layer_norm_weights_tensor,
+ output_layer_norm_weights_tensor,
+ /*aux_input=*/nullptr,
+ /*aux_input_to_input_weights=*/nullptr,
+ /*aux_input_to_forget_weights=*/nullptr,
+ /*aux_input_to_cell_weights=*/nullptr,
+ /*aux_input_to_output_weights=*/nullptr, input_gate_bias_tensor, forget_gate_bias_tensor,
+ cell_gate_bias_tensor, output_gate_bias_tensor, projection_weights_tensor,
+ projection_bias_tensor, output_state_in_tensor, cell_state_in_tensor, node.param(),
+ /*forward_sequence=*/true, time_major,
+ /*output_offset=*/0, scratch_buffer_tensor, output_state_out_tensor, cell_state_out_tensor,
+ output_tensor,
+ !_ctx.at(output_state_in_index).info().isVariable() /* means empty buffer on frontend now */,
+ !_ctx.at(cell_state_in_index).info().isVariable());
_return_fn = std::move(fn);
}
#include "ExternalContext.h"
#include "TensorBuilder.h"
-#include "backend/cpu_common/TensorRegistry.h"
+#include "backend/basic/TensorRegistry.h"
#include "Tensor.h"
#include <backend/CustomKernelBuilder.h>
-#include <backend/cpu_common/KernelGeneratorBase.h>
+#include <backend/basic/KernelGeneratorBase.h>
#include <ir/Operands.h>
#include <ir/Operations.h>
namespace cpu
{
-class KernelGenerator : public cpu_common::KernelGeneratorBase
+class KernelGenerator : public basic::KernelGeneratorBase
{
public:
- KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder,
- const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
+ KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
const std::shared_ptr<ExternalContext> &external_context);
- void visit(const ir::OpSequence &) override;
+ std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex op_ind) override;
void visit(const ir::operation::AddN &) override;
void visit(const ir::operation::ArgMinMax &) override;
private:
const ir::Operands &_ctx;
const ir::Operations &_operations_ctx;
+ ir::Layout _current_layout;
std::shared_ptr<TensorBuilder> _tensor_builder;
- std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
+ std::shared_ptr<basic::TensorRegistry> _tensor_reg;
std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
- ir::Layout _current_layout;
const std::shared_ptr<ExternalContext> _external_context;
};
#ifndef __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__
#define __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__
-#include "backend/cpu_common/StaticTensorManager.h"
+#include "backend/basic/StaticTensorManager.h"
namespace onert
{
namespace cpu
{
-using StaticTensorManager = cpu_common::StaticTensorManager;
+using StaticTensorManager = basic::StaticTensorManager;
} // namespace cpu
} // namespace backend
#ifndef __ONERT_BACKEND_CPU_TENSOR_H__
#define __ONERT_BACKEND_CPU_TENSOR_H__
-#include <backend/cpu_common/Tensor.h>
+#include <backend/basic/Tensor.h>
#include <ir/Data.h>
namespace onert
namespace cpu
{
-using Tensor = cpu_common::Tensor;
-using ExternalTensor = cpu_common::ExternalTensor;
+using Tensor = basic::Tensor;
+using ExternalTensor = basic::ExternalTensor;
} // namespace cpu
} // namespace backend
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TensorBuilder.h"
-
-#include <util/logging.h>
-
-#include <cassert>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-
-TensorBuilder::TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg)
- : _tensor_reg{tensor_reg},
- _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)},
- _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
-{
- /* empty */
-}
-
-void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- ir::Layout layout)
-{
- _tensor_info_map.emplace(ind, info);
-
- // CPU backend supports only one layout as NHWC
- assert(layout == ir::Layout::NHWC);
- if (info.isDynamic())
- {
- _dynamic_tensor_mgr->buildTensor(ind, info, layout);
- }
- else
- {
- _static_tensor_mgr->buildTensor(ind, info, layout, info.isConstant());
- }
-}
-
-void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
-{
- assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
- const auto tensor_info = _tensor_info_map.at(ind);
-
- if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
- {
- const auto size = tensor_info.total_size();
- _static_tensor_mgr->claimPlan(ind, size);
- }
-}
-
-void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
-{
- if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
- {
- _static_tensor_mgr->releasePlan(ind);
- }
-}
-
-bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
-{
- return _tensor_info_map.find(ind) != _tensor_info_map.end();
-}
-
-void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); }
-
-void TensorBuilder::allocate()
-{
- // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate
- // This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
-}
-
-} // namespace cpu
-} // namespace backend
-} // namespace onert
#ifndef __ONERT_BACKEND_CPU_TENSOR_BUILDER_H__
#define __ONERT_BACKEND_CPU_TENSOR_BUILDER_H__
-#include <backend/cpu_common/DynamicTensorManager.h>
-#include <backend/cpu_common/TensorRegistry.h>
-
-#include <ir/OperandIndexMap.h>
-
-#include "StaticTensorManager.h"
-#include "Tensor.h"
-
-#include <unordered_map>
+#include <backend/basic/TensorBuilder.h>
namespace onert
{
namespace cpu
{
-class TensorBuilder
-{
-public:
- TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg);
-
- /**
- * @brief Register tensor information to allocate on CPU backend
- * @param[in] ind Operand index
- * @param[in] info Operand information
- * @param[in] layout Operand data layout
- */
- void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- ir::Layout backend_layout);
-
- void notifyFirstUse(const ir::OperandIndex &);
- void notifyLastUse(const ir::OperandIndex &);
-
- bool isRegistered(const ir::OperandIndex &) const;
-
- void prepare(void);
- void allocate();
- void postFunctionPrepare() { /* DO NOTHING */}
-
- IDynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }
-
-private:
- const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
- std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr;
- std::unique_ptr<StaticTensorManager> _static_tensor_mgr;
- ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
-};
+using TensorBuilder = basic::TensorBuilder;
} // namespace cpu
} // namespace backend
std::vector<const int32_t *> input_buffers(input_size);
for (size_t i = 0; i < input_size; i++)
{
- input_buffers[i] = reinterpret_cast<int32_t *>(_inputs[i]->buffer());
+ input_buffers[i] = getBuffer<int32_t>(_inputs[i]);
}
- AddN(getTensorShape(_inputs[0]), input_size, input_buffers.data(),
- reinterpret_cast<int32_t *>(_output->buffer()));
+ AddN(getShape(_inputs[0]), input_size, input_buffers.data(), getBuffer<int32_t>(_output));
}
else if (_output->data_type() == ir::DataType::FLOAT32)
{
std::vector<const float *> input_buffers(input_size);
for (size_t i = 0; i < input_size; i++)
{
- input_buffers[i] = reinterpret_cast<float *>(_inputs[i]->buffer());
+ input_buffers[i] = getBuffer<float>(_inputs[i]);
}
- AddN(getTensorShape(_inputs[0]), input_size, input_buffers.data(),
- reinterpret_cast<float *>(_output->buffer()));
+ AddN(getShape(_inputs[0]), input_size, input_buffers.data(), getBuffer<float>(_output));
}
else
{
return std::less<T>();
}
}
-}
+} // namespace
void ArgMinMaxLayer::configure(const IPortableTensor *input, IPortableTensor *output,
const IPortableTensor *axis, bool is_arg_max)
{
throw std::runtime_error("ArgMinMax: wrong shape of axis");
}
- auto axis = *reinterpret_cast<const int32_t *>(_axis->buffer());
+ auto axis = *getBuffer<int32_t>(_axis);
if (axis < 0)
{
- axis += _input->num_dimensions();
+ axis += _input->getShape().rank();
}
-#define TF_LITE_ARG_MIN_MAX(input_type, axis_type, output_type) \
- ArgMinMax(getTensorShape(_input), reinterpret_cast<const input_type *>(_input->buffer()), \
- getTensorShape(_output), reinterpret_cast<output_type *>(_output->buffer()), axis, \
- GetComparefunction<input_type>(_is_arg_max));
+#define TF_LITE_ARG_MIN_MAX(input_type, axis_type, output_type) \
+ ArgMinMax(getShape(_input), getBuffer<input_type>(_input), getShape(_output), \
+ getBuffer<output_type>(_output), axis, GetComparefunction<input_type>(_is_arg_max));
if (_output->data_type() == ir::DataType::INT32)
{
switch (_input->data_type())
{
BatchMatMulLayer::BatchMatMulLayer()
- : _lhs(nullptr), _rhs(nullptr), _output(nullptr), _adj_x(false), _adj_y(false),
- _kernel(new nnfw::cker::BatchMatMul())
+ : _lhs(nullptr), _rhs(nullptr), _output(nullptr), _adj_x(false), _adj_y(false),
+ _kernel(new nnfw::cker::BatchMatMul())
{
// DO NOTHING
}
void BatchMatMulLayer::batchMatMulFloat32()
{
nnfw::cker::BatchMatMul &batchmatmul_kernel = *_kernel;
- nnfw::cker::Shape lhs_shape = getTensorShape(_lhs);
- nnfw::cker::Shape rhs_shape = getTensorShape(_rhs);
- nnfw::cker::Shape output_shape = getTensorShape(_output);
+ nnfw::cker::Shape lhs_shape = getShape(_lhs);
+ nnfw::cker::Shape rhs_shape = getShape(_rhs);
+ nnfw::cker::Shape output_shape = getShape(_output);
// TODO implement for constant input
batchmatmul_kernel.prepare(lhs_shape, rhs_shape, _adj_x, _adj_y);
- batchmatmul_kernel(lhs_shape, reinterpret_cast<const float *>(_lhs->buffer()), rhs_shape,
- reinterpret_cast<const float *>(_rhs->buffer()), _adj_x, _adj_y, output_shape,
- reinterpret_cast<float *>(_output->buffer()));
+ batchmatmul_kernel(lhs_shape, getBuffer<float>(_lhs), rhs_shape, getBuffer<float>(_rhs), _adj_x,
+ _adj_y, output_shape, getBuffer<float>(_output));
}
void BatchMatMulLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs, bool adj_x,
{
BatchToSpaceNDLayer::BatchToSpaceNDLayer()
- : _input(nullptr), _output(nullptr), _block_shape(nullptr), _crops(nullptr)
+ : _input(nullptr), _output(nullptr), _block_shape(nullptr), _crops(nullptr)
{
// DO NOTHING
}
}
else
{
- _crops_buffer = reinterpret_cast<const int32_t *>(_crops->buffer());
+ _crops_buffer = getBuffer<int32_t>(_crops);
}
- nnfw::cker::BatchToSpaceND<T>(
- getTensorShape(_input), reinterpret_cast<const T *>(_input->buffer()),
- reinterpret_cast<const int32_t *>(_block_shape->buffer()), _crops_buffer,
- getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()));
+ nnfw::cker::BatchToSpaceND<T>(getShape(_input), getBuffer<T>(_input),
+ getBuffer<int32_t>(_block_shape), _crops_buffer, getShape(_output),
+ getBuffer<T>(_output));
}
void BatchToSpaceNDLayer::configure(const IPortableTensor *input, IPortableTensor *output,
Eval(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output,
nnfw::cker::BinaryArithmeticOpParam op_params)
- : _op_params(std::move(op_params)), _need_broadcast(false)
+ : _op_params(std::move(op_params)), _need_broadcast(false)
{
if (!output->is_dynamic())
updateCache(lhs, rhs, output);
void updateCache(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output)
{
- _lhs_shape.ReplaceWith(getTensorShape(lhs));
- _rhs_shape.ReplaceWith(getTensorShape(rhs));
- _output_shape.ReplaceWith(getTensorShape(output));
+ _lhs_shape.ReplaceWith(getShape(lhs));
+ _rhs_shape.ReplaceWith(getShape(rhs));
+ _output_shape.ReplaceWith(getShape(output));
_need_broadcast = nnfw::cker::ProcessBroadcastShapes(_lhs_shape, _rhs_shape, &_op_params);
}
if (output->is_dynamic())
updateCache(lhs, rhs, output);
else
- assert(_lhs_shape == getTensorShape(lhs) && _rhs_shape == getTensorShape(rhs) &&
- _output_shape == getTensorShape(output));
- auto lhs_buffer = reinterpret_cast<const T *>(lhs->buffer());
- auto rhs_buffer = reinterpret_cast<const T *>(rhs->buffer());
- auto output_buffer = reinterpret_cast<T *>(output->buffer());
+ assert(_lhs_shape == getShape(lhs) && _rhs_shape == getShape(rhs) &&
+ _output_shape == getShape(output));
+ auto lhs_buffer = getBuffer<T>(lhs);
+ auto rhs_buffer = getBuffer<T>(rhs);
+ auto output_buffer = getBuffer<T>(output);
if (_need_broadcast)
{
nnfw::cker::BroadcastBinaryArithmeticOp<arithmetic_type>(
- _op_params, _lhs_shape, lhs_buffer, _rhs_shape, rhs_buffer, _output_shape, output_buffer);
+ _op_params, _lhs_shape, lhs_buffer, _rhs_shape, rhs_buffer, _output_shape, output_buffer);
}
else
{
nnfw::cker::BinaryArithmeticOp<arithmetic_type>(
- _op_params, _lhs_shape, lhs_buffer, _rhs_shape, rhs_buffer, _output_shape, output_buffer);
+ _op_params, _lhs_shape, lhs_buffer, _rhs_shape, rhs_buffer, _output_shape, output_buffer);
}
}
};
nnfw::cker::BinaryArithmeticOpParam *params)
{
int32_t output_activation_min, output_activation_max;
- CalculateActivationRangeUint8(activation, output, &output_activation_min, &output_activation_max);
+ CalculateActivationRangeQuantized(activation, output, &output_activation_min,
+ &output_activation_max);
nnfw::cker::BinaryArithmeticOpParam &op_params = *params;
op_params.quantized_activation_max = output_activation_max;
op_params.quantized_activation_min = output_activation_min;
// Parameters for scaled quantized computation
op_params.left_shift = 20;
// Zero-points of input and output tensors
- op_params.input1_offset = -lhs->data_offset();
- op_params.input2_offset = -rhs->data_offset();
- op_params.output_offset = output->data_offset();
- assert((op_params.input1_offset <= 0) && (op_params.input1_offset >= -255));
- assert((op_params.input2_offset <= 0) && (op_params.input2_offset >= -255));
- assert((op_params.output_offset >= 0) && (op_params.output_offset <= 255));
+ op_params.input1_offset = -lhs->data_zero_point();
+ op_params.input2_offset = -rhs->data_zero_point();
+ op_params.output_offset = output->data_zero_point();
// Compute normalized scale for _lhs and _rhs values,
// and represent in 32-bit fixed point
const double real_rhs_scale = rhs->data_scale() / norm_max_scale;
// output scale is used to normalize final result, so we invert the scale here
const double real_output_scale =
- norm_max_scale / (output->data_scale() * (1 << op_params.left_shift));
+ norm_max_scale / (output->data_scale() * (1 << op_params.left_shift));
// Represent the scales as fixed int32_t multipliers, and int32_t shifts
QuantizeMultiplier(real_lhs_scale, &op_params.input1_multiplier, &op_params.input1_shift);
nnfw::cker::BinaryArithmeticOpParam *params)
{
int32_t output_activation_min, output_activation_max;
- CalculateActivationRangeUint8(activation, output, &output_activation_min, &output_activation_max);
+ CalculateActivationRangeQuantized(activation, output, &output_activation_min,
+ &output_activation_max);
nnfw::cker::BinaryArithmeticOpParam &op_params = *params;
op_params.quantized_activation_max = output_activation_max;
op_params.quantized_activation_min = output_activation_min;
- op_params.input1_offset = -lhs->data_offset();
- op_params.input2_offset = -rhs->data_offset();
- op_params.output_offset = output->data_offset();
+ op_params.input1_offset = -lhs->data_zero_point();
+ op_params.input2_offset = -rhs->data_zero_point();
+ op_params.output_offset = output->data_zero_point();
double real_multiplier = lhs->data_scale() * rhs->data_scale() / output->data_scale();
QuantizeMultiplier(real_multiplier, &op_params.output_multiplier, &op_params.output_shift);
{
setAddOrSubQuant8Params(_lhs, _rhs, _output, activation, &op_params);
_kernel =
- Eval<nnfw::cker::BinaryArithmeticOpType::ADD, uint8_t>(_lhs, _rhs, _output, op_params);
+ Eval<nnfw::cker::BinaryArithmeticOpType::ADD, uint8_t>(_lhs, _rhs, _output, op_params);
}
+ else if (_lhs->data_type() == OperandType::QUANT_INT8_ASYMM)
+ {
+ setAddOrSubQuant8Params(_lhs, _rhs, _output, activation, &op_params);
+ _kernel =
+ Eval<nnfw::cker::BinaryArithmeticOpType::ADD, int8_t>(_lhs, _rhs, _output, op_params);
+ }
+
else
{
_kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::ADD>(
- _lhs, _rhs, _output, activation, op_params);
+ _lhs, _rhs, _output, activation, op_params);
}
break;
case ArithmeticType::kSub:
setAddOrSubQuant8Params(_lhs, _rhs, _output, activation, &op_params);
op_params.input2_multiplier *= -1;
_kernel =
- Eval<nnfw::cker::BinaryArithmeticOpType::SUB, uint8_t>(_lhs, _rhs, _output, op_params);
+ Eval<nnfw::cker::BinaryArithmeticOpType::SUB, uint8_t>(_lhs, _rhs, _output, op_params);
+ }
+ else if (_lhs->data_type() == OperandType::QUANT_INT8_ASYMM)
+ {
+ setAddOrSubQuant8Params(_lhs, _rhs, _output, activation, &op_params);
+ op_params.input2_multiplier *= -1;
+ _kernel =
+ Eval<nnfw::cker::BinaryArithmeticOpType::SUB, int8_t>(_lhs, _rhs, _output, op_params);
}
+
else
{
_kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::SUB>(
- _lhs, _rhs, _output, activation, op_params);
+ _lhs, _rhs, _output, activation, op_params);
}
break;
case ArithmeticType::kMul:
nnfw::cker::BinaryArithmeticOpParam op_params;
setMulQuant8Params(_lhs, _rhs, _output, activation, &op_params);
_kernel =
- Eval<nnfw::cker::BinaryArithmeticOpType::MUL, uint8_t>(_lhs, _rhs, _output, op_params);
+ Eval<nnfw::cker::BinaryArithmeticOpType::MUL, uint8_t>(_lhs, _rhs, _output, op_params);
+ }
+ else if (_lhs->data_type() == OperandType::QUANT_INT8_ASYMM)
+ {
+ nnfw::cker::BinaryArithmeticOpParam op_params;
+ setMulQuant8Params(_lhs, _rhs, _output, activation, &op_params);
+ _kernel =
+ Eval<nnfw::cker::BinaryArithmeticOpType::MUL, int8_t>(_lhs, _rhs, _output, op_params);
}
else
{
_kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::MUL>(
- _lhs, _rhs, _output, activation, op_params);
+ _lhs, _rhs, _output, activation, op_params);
}
break;
case ArithmeticType::kDiv:
if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
{
throw std::runtime_error{
- "BinaryArithmetic(Div): Div operation does not support quantization"};
+ "BinaryArithmetic(Div): Div operation does not support quantization"};
}
else if (_lhs->data_type() == OperandType::INT32)
{
else
{
_kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::DIV>(
- _lhs, _rhs, _output, activation, op_params);
+ _lhs, _rhs, _output, activation, op_params);
}
break;
default:
{
// ToDo : It need to support INT8 and UINT8 also when will be applied quantization.
case OperandType::FLOAT32:
- nnfw::cker::BroadcastTo<float>(
- getTensorShape(_input), reinterpret_cast<float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+ nnfw::cker::BroadcastTo<float>(getShape(_input), reinterpret_cast<float *>(_input->buffer()),
+ getShape(_output), getBuffer<float>(_output));
break;
case OperandType::INT32:
- nnfw::cker::BroadcastTo<int32_t>(
- getTensorShape(_input), reinterpret_cast<int32_t *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer()));
+ nnfw::cker::BroadcastTo<int32_t>(getShape(_input),
+ reinterpret_cast<int32_t *>(_input->buffer()),
+ getShape(_output), getBuffer<int32_t>(_output));
break;
case OperandType::UINT32:
- nnfw::cker::BroadcastTo<uint32_t>(
- getTensorShape(_input), reinterpret_cast<uint32_t *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<uint32_t *>(_output->buffer()));
+ nnfw::cker::BroadcastTo<uint32_t>(getShape(_input),
+ reinterpret_cast<uint32_t *>(_input->buffer()),
+ getShape(_output), getBuffer<uint32_t>(_output));
break;
default:
throw std::runtime_error{"BroadcastToLayer: unsupported data type"};
{
nnfw::cker::ComparisonParams params;
params.left_shift = 8;
- params.input1_offset = -lhs->data_offset();
- params.input2_offset = -rhs->data_offset();
+ params.input1_offset = -lhs->data_zero_point();
+ params.input2_offset = -rhs->data_zero_point();
const double norm_max_scale =
- 2 * std::max(std::abs(lhs->data_scale()), std::abs(rhs->data_scale()));
+ 2 * std::max(std::abs(lhs->data_scale()), std::abs(rhs->data_scale()));
const double adjusted_lhs_scale = lhs->data_scale() / norm_max_scale;
const double adjusted_rhs_scale = rhs->data_scale() / norm_max_scale;
QuantizeMultiplierSmallerThanOneExp(adjusted_lhs_scale, ¶ms.input1_multiplier,
¶ms.input2_shift);
params.is_broadcast = !HaveSameShapes(lhs, rhs);
- using CompareFunction =
- void (*)(ComparisonParams & params, const Shape &input1_shape, const T *input1_data,
- const Shape &input2_shape, const T *input2_data, const Shape &output_shape,
- bool *output_data);
+ using CompareFunction = void (*)(
+ ComparisonParams & params, const Shape &input1_shape, const T *input1_data,
+ const Shape &input2_shape, const T *input2_data, const Shape &output_shape, bool *output_data);
static const CompareFunction broadcast_fns[] = {
- Broadcast4DSlowEqualWithScaling, Broadcast4DSlowNotEqualWithScaling,
- Broadcast4DSlowGreaterWithScaling, Broadcast4DSlowGreaterEqualWithScaling,
- Broadcast4DSlowLessWithScaling, Broadcast4DSlowLessEqualWithScaling,
+ Broadcast4DSlowEqualWithScaling, Broadcast4DSlowNotEqualWithScaling,
+ Broadcast4DSlowGreaterWithScaling, Broadcast4DSlowGreaterEqualWithScaling,
+ Broadcast4DSlowLessWithScaling, Broadcast4DSlowLessEqualWithScaling,
};
static const CompareFunction non_broadcast_fns[] = {
- EqualWithScaling, NotEqualWithScaling, GreaterWithScaling,
- GreaterEqualWithScaling, LessWithScaling, LessEqualWithScaling,
+ EqualWithScaling, NotEqualWithScaling, GreaterWithScaling,
+ GreaterEqualWithScaling, LessWithScaling, LessEqualWithScaling,
};
static_assert(sizeof(broadcast_fns) == sizeof(non_broadcast_fns),
CompareFunction fn = (params.is_broadcast ? broadcast_fns[index] : non_broadcast_fns[index]);
- fn(params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
+ fn(params, getExtendedTensorShape(lhs), getBuffer<T>(lhs), getExtendedTensorShape(rhs),
+ getBuffer<T>(rhs), getExtendedTensorShape(output), getBuffer<bool>(output));
}
template <typename T>
bool requires_broadcast = !HaveSameShapes(lhs, rhs);
using CompareFunction =
- void (*)(const Shape &input1_shape, const T *input1_data, const Shape &input2_shape,
- const T *input2_data, const Shape &output_shape, bool *output_data);
+ void (*)(const Shape &input1_shape, const T *input1_data, const Shape &input2_shape,
+ const T *input2_data, const Shape &output_shape, bool *output_data);
static const CompareFunction broadcast_fns[] = {
- Broadcast4DSlowEqual, Broadcast4DSlowNotEqual, Broadcast4DSlowGreater,
- Broadcast4DSlowGreaterEqual, Broadcast4DSlowLess, Broadcast4DSlowLessEqual,
+ Broadcast4DSlowEqual, Broadcast4DSlowNotEqual, Broadcast4DSlowGreater,
+ Broadcast4DSlowGreaterEqual, Broadcast4DSlowLess, Broadcast4DSlowLessEqual,
};
static const CompareFunction non_broadcast_fns[] = {
- EqualNoScaling, NotEqualNoScaling, GreaterNoScaling,
- GreaterEqualNoScaling, LessNoScaling, LessEqualNoScaling,
+ EqualNoScaling, NotEqualNoScaling, GreaterNoScaling,
+ GreaterEqualNoScaling, LessNoScaling, LessEqualNoScaling,
};
static_assert(sizeof(broadcast_fns) == sizeof(non_broadcast_fns),
CompareFunction fn = (requires_broadcast ? broadcast_fns[index] : non_broadcast_fns[index]);
- fn(getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
+ fn(getExtendedTensorShape(lhs), getBuffer<T>(lhs), getExtendedTensorShape(rhs), getBuffer<T>(rhs),
+ getExtendedTensorShape(output), getBuffer<bool>(output));
}
} // namespace
CompareLayer::CompareLayer()
- : _lhs(nullptr), _rhs(nullptr), _output(nullptr),
- _op_type(ir::operation::Comparison::ComparisonType::Equal)
+ : _lhs(nullptr), _rhs(nullptr), _output(nullptr),
+ _op_type(ir::operation::Comparison::ComparisonType::Equal)
{
// DO NOTHING
}
for (uint32_t i = 0; i < num_inputs; i++)
{
- inputDims.push_back(getTensorShape(_inputs[i]));
+ inputDims.push_back(getShape(_inputs[i]));
inputDimsPtr.push_back(&inputDims[i]);
}
for (const auto input : _inputs)
{
- inputDataPtrs.emplace_back(reinterpret_cast<const T *>(input->buffer()));
+ inputDataPtrs.emplace_back(getBuffer<T>(input));
}
nnfw::cker::Concatenation<T>(op_params, inputDimsPtr.data(), inputDataPtrs.data(),
- getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()));
+ getShape(_output), getBuffer<T>(_output));
}
void ConcatLayer::concatenationQuant8()
{
std::vector<float> input_scales(num_inputs);
for (uint32_t i = 0; i < num_inputs; i++)
{
- input_zeropoints[i] = _inputs[i]->data_offset();
+ input_zeropoints[i] = _inputs[i]->data_zero_point();
input_scales[i] = _inputs[i]->data_scale();
}
op_params.inputs_count = num_inputs;
op_params.input_zeropoint = input_zeropoints.data();
op_params.input_scale = input_scales.data();
- op_params.output_zeropoint = _output->data_offset();
+ op_params.output_zeropoint = _output->data_zero_point();
op_params.output_scale = _output->data_scale();
std::vector<nnfw::cker::Shape *> inputDimsPtr;
inputDims.reserve(num_inputs);
for (uint32_t i = 0; i < num_inputs; i++)
{
- inputDims.push_back(getTensorShape(_inputs[i]));
+ inputDims.push_back(getShape(_inputs[i]));
inputDimsPtr.push_back(&inputDims[i]);
}
std::vector<const uint8_t *> inputDataPtrs;
for (const auto input : _inputs)
{
- inputDataPtrs.emplace_back(reinterpret_cast<const uint8_t *>(input->buffer()));
+ inputDataPtrs.emplace_back(getBuffer<uint8_t>(input));
}
nnfw::cker::ConcatenationWithScaling(op_params, inputDimsPtr.data(), inputDataPtrs.data(),
- getTensorShape(_output),
- reinterpret_cast<uint8_t *>(_output->buffer()));
+ getShape(_output), getBuffer<uint8_t>(_output));
}
void ConcatLayer::configure(const std::vector<const IPortableTensor *> &inputs, int32_t axis,
*/
#include "ConvolutionLayer.h"
+#include "OperationUtils.h"
#include "../Tensor.h"
#include "ir/Padding.h"
namespace ops
{
ConvolutionLayer::ConvolutionLayer()
- : _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
- _paddingType(ir::PaddingType::EXPLICIT), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
- _paddingBottom(0), _strideWidth(0), _strideHeight(0), _dilationWidthFactor(1),
- _dilationHeightFactor(1), _activation(ir::Activation::NONE),
- _conv_kernel(new nnfw::cker::Conv()), _prepare(false)
+ : _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+ _paddingType(ir::PaddingType::EXPLICIT), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
+ _paddingBottom(0), _strideWidth(0), _strideHeight(0), _dilationWidthFactor(1),
+ _dilationHeightFactor(1), _activation(ir::Activation::NONE),
+ _conv_kernel(new nnfw::cker::Conv()), _prepare(false)
{
// DO NOTHING
}
op_params.float_activation_max = output_activation_max;
nnfw::cker::Conv &kernel = *_conv_kernel;
- kernel(op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()),
- getTensorShape(_bias), reinterpret_cast<const float *>(_bias->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+ kernel(op_params, getShape(_input), getBuffer<float>(_input), getShape(_kernel),
+ getBuffer<float>(_kernel), getShape(_bias), getBuffer<float>(_bias), getShape(_output),
+ getBuffer<float>(_output));
}
void ConvolutionLayer::convQuant8()
{
int32_t output_activation_min = 0;
int32_t output_activation_max = 0;
- CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
- &output_activation_max);
+ CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
+ &output_activation_max);
double real_multiplier = 0.0;
int32_t output_multiplier = 0;
op_params.padding_type = getPaddingType(_paddingType);
op_params.padding_values.width = _paddingLeft;
op_params.padding_values.height = _paddingTop;
- op_params.input_offset = -_input->data_offset();
- op_params.weights_offset = -_kernel->data_offset();
- op_params.output_offset = _output->data_offset();
+ op_params.input_offset = -_input->data_zero_point();
+ op_params.weights_offset = -_kernel->data_zero_point();
+ op_params.output_offset = _output->data_zero_point();
op_params.output_multiplier = output_multiplier;
op_params.output_shift = output_shift;
op_params.quantized_activation_min = output_activation_min;
op_params.is_replaced_weights = true;
nnfw::cker::Conv &kernel = *_conv_kernel;
- kernel(op_params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
- getTensorShape(_kernel), reinterpret_cast<const uint8_t *>(_kernel->buffer()),
- getTensorShape(_bias), reinterpret_cast<const int32_t *>(_bias->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
+ kernel(op_params, getShape(_input), getBuffer<uint8_t>(_input), getShape(_kernel),
+ getBuffer<uint8_t>(_kernel), getShape(_bias), getBuffer<int32_t>(_bias), getShape(_output),
+ getBuffer<uint8_t>(_output));
+}
+
+void ConvolutionLayer::convQuant8PerChannel()
+{
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+ CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
+ &output_activation_max);
+
+ nnfw::cker::ConvParams op_params;
+ op_params.input_offset = -_input->data_zero_point();
+ op_params.output_offset = _output->data_zero_point();
+ op_params.stride_height = _strideHeight;
+ op_params.stride_width = _strideWidth;
+ op_params.dilation_height_factor = _dilationHeightFactor;
+ op_params.dilation_width_factor = _dilationWidthFactor;
+ op_params.padding_values.height = _paddingTop;
+ op_params.padding_values.width = _paddingLeft;
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+
+ nnfw::cker::Conv &kernel = *_conv_kernel;
+ kernel(op_params, getShape(_input), reinterpret_cast<const int8_t *>(_input->buffer()),
+ getShape(_kernel), reinterpret_cast<const int8_t *>(_kernel->buffer()), getShape(_bias),
+ reinterpret_cast<const int32_t *>(_bias->buffer()), getShape(_output),
+ reinterpret_cast<int8_t *>(_output->buffer()));
}
void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTensor *kernel,
param_padding.param.bottom = _paddingBottom;
const auto padding =
- ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
- _dilationWidthFactor, _dilationHeightFactor);
+ ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+ _dilationWidthFactor, _dilationHeightFactor);
_paddingLeft = padding.left;
_paddingRight = padding.right;
{
convQuant8();
}
+ else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
+ {
+ convQuant8PerChannel();
+ }
else
{
throw std::runtime_error{"Conv: unsupported data type"};
if (_input->data_type() == OperandType::FLOAT32 && _kernel->is_constant())
{
bool is_transposed = false;
- kernel.prepare(getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()),
- getPaddingType(_paddingType), is_transposed, _dilationWidthFactor,
- _dilationHeightFactor);
+ kernel.prepare(getShape(_kernel), getBuffer<float>(_kernel), getPaddingType(_paddingType),
+ is_transposed, _dilationWidthFactor, _dilationHeightFactor);
// Decrease reference of _kernel(weights) only when _kernel is constant
if (is_transposed)
else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM && _kernel->is_constant() &&
!_input->is_dynamic() && !_output->is_dynamic())
{
- kernel.prepareQuant(getTensorShape(_input), getTensorShape(_kernel), getTensorShape(_output),
- _strideWidth, _strideHeight, _dilationWidthFactor, _dilationHeightFactor);
+ kernel.prepareQuant(getShape(_input), getShape(_kernel), getShape(_output), _strideWidth,
+ _strideHeight, _dilationWidthFactor, _dilationHeightFactor);
+ }
+ else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
+ {
+ if (_kernel->is_constant() && !_input->is_dynamic() && !_output->is_dynamic())
+ {
+ GetQuantizedConvolutionMultipliersAndShifts(
+ _input->data_scale(), _output->data_scale(), _kernel->data_scales().data(),
+ _kernel->data_scales().size(), getShape(_kernel).Dims(0),
+ kernel.per_channel_output_multiplier(), kernel.per_channel_output_shift());
+ }
+ else
+ {
+ throw std::runtime_error{"Conv2D: Int8 dynamic weight is not supported"};
+ }
}
_prepare = true;
}
void convQuant8();
+ void convQuant8PerChannel();
+
void configure(const IPortableTensor *input, const IPortableTensor *kernel,
const IPortableTensor *bias, ir::PaddingType _paddingType,
const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
template <typename T> void DepthToSpaceLayer::depthToSpace()
{
- nnfw::cker::DepthToSpace(getTensorShape(_input), reinterpret_cast<const T *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()),
- _block_size);
+ nnfw::cker::DepthToSpace(getShape(_input), getBuffer<T>(_input), getShape(_output),
+ getBuffer<T>(_output), _block_size);
}
void DepthToSpaceLayer::configure(const IPortableTensor *input, const int32_t block_size,
op_params.float_activation_max = output_activation_max;
nnfw::cker::DepthwiseConv<float, float>(
- op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()),
- getTensorShape(_bias), reinterpret_cast<const float *>(_bias->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
- _external_context->ruy_context());
+ op_params, getShape(_input), getBuffer<float>(_input), getShape(_kernel),
+ getBuffer<float>(_kernel), getShape(_bias), getBuffer<float>(_bias), getShape(_output),
+ getBuffer<float>(_output), _external_context->ruy_context());
}
void DepthwiseConvolutionLayer::convQuant8()
{
int32_t output_activation_min = 0;
int32_t output_activation_max = 0;
- CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
- &output_activation_max);
+ CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
+ &output_activation_max);
double real_multiplier = 0.0;
int32_t output_multiplier = 0;
op_params.padding_values.width = _paddingLeft;
op_params.padding_values.height = _paddingTop;
op_params.depth_multiplier = _multiplier;
- op_params.input_offset = -_input->data_offset();
- op_params.weights_offset = -_kernel->data_offset();
- op_params.output_offset = _output->data_offset();
+ op_params.input_offset = -_input->data_zero_point();
+ op_params.weights_offset = -_kernel->data_zero_point();
+ op_params.output_offset = _output->data_zero_point();
op_params.output_multiplier = output_multiplier;
op_params.output_shift = output_shift;
op_params.quantized_activation_min = output_activation_min;
op_params.quantized_activation_max = output_activation_max;
nnfw::cker::DepthwiseConv<uint8_t, int32_t>(
- op_params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
- getTensorShape(_kernel), reinterpret_cast<const uint8_t *>(_kernel->buffer()),
- getTensorShape(_bias), reinterpret_cast<const int32_t *>(_bias->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()),
- _external_context->ruy_context());
+ op_params, getShape(_input), getBuffer<uint8_t>(_input), getShape(_kernel),
+ getBuffer<uint8_t>(_kernel), getShape(_bias), getBuffer<int32_t>(_bias), getShape(_output),
+ getBuffer<uint8_t>(_output), _external_context->ruy_context());
+}
+
+void DepthwiseConvolutionLayer::convQuant8PerChannel()
+{
+ if (!_prepared)
+ {
+ prepareQuant8PerChannel();
+ _prepared = true;
+ }
+
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+ CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
+ &output_activation_max);
+
+ nnfw::cker::DepthwiseConvParams op_params;
+ op_params.padding_type = nnfw::cker::PaddingType::kSame;
+ op_params.padding_values.width = _paddingLeft;
+ op_params.padding_values.height = _paddingTop;
+ op_params.depth_multiplier = _multiplier;
+ op_params.stride_width = _strideWidth;
+ op_params.stride_height = _strideHeight;
+ op_params.dilation_width_factor = _dilationWidth;
+ op_params.dilation_height_factor = _dilationHeight;
+ op_params.input_offset = -_input->data_zero_point();
+ op_params.weights_offset = 0;
+ op_params.output_offset = _output->data_zero_point();
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+
+ nnfw::cker::optimized_integer_ops::DepthwiseConvPerChannel(
+ op_params, _per_channel_output_multiplier.data(), _per_channel_output_shift.data(),
+ getShape(_input), getBuffer<int8_t>(_input), getShape(_kernel), getBuffer<int8_t>(_kernel),
+ getShape(_bias), getBuffer<int32_t>(_bias), getShape(_output), getBuffer<int8_t>(_output),
+ _external_context->ruy_context());
+}
+
+void DepthwiseConvolutionLayer::prepareQuant8PerChannel()
+{
+ GetQuantizedConvolutionMultipliersAndShifts(
+ _input->data_scale(), _output->data_scale(), _kernel->data_scales().data(),
+ _kernel->data_scales().size(), getShape(_kernel).Dims(3), _per_channel_output_multiplier,
+ _per_channel_output_shift);
}
void DepthwiseConvolutionLayer::configure(
- const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias,
- const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
- const uint32_t paddingBottom, const uint32_t strideWidth, const uint32_t strideHeight,
- const uint32_t multiplier, const uint32_t dilationWidth, const uint32_t dilationHeight,
- const ir::Activation activation, IPortableTensor *output,
- const std::shared_ptr<ExternalContext> &external_context)
+ const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias,
+ const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
+ const uint32_t paddingBottom, const uint32_t strideWidth, const uint32_t strideHeight,
+ const uint32_t multiplier, const uint32_t dilationWidth, const uint32_t dilationHeight,
+ const ir::Activation activation, IPortableTensor *output,
+ const std::shared_ptr<ExternalContext> &external_context)
{
_input = input;
_kernel = kernel;
_activation = activation;
_output = output;
_external_context = external_context;
+
+ if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
+ {
+ if (_kernel->is_constant() && !_input->is_dynamic() && !_output->is_dynamic())
+ {
+ prepareQuant8PerChannel();
+ _prepared = true;
+ }
+ }
}
void DepthwiseConvolutionLayer::run()
{
convQuant8();
}
+ else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
+ {
+ convQuant8PerChannel();
+ }
else
{
throw std::runtime_error{"DepthwiseConv: unsupported data type"};
void convQuant8();
+ void convQuant8PerChannel();
+
void configure(const IPortableTensor *input, const IPortableTensor *kernel,
const IPortableTensor *bias, const uint32_t paddingLeft,
const uint32_t paddingRight, const uint32_t paddingTop,
void run() override;
+private:
+ void prepareQuant8PerChannel();
+
private:
const IPortableTensor *_input{nullptr};
const IPortableTensor *_kernel{nullptr};
ir::Activation _activation{ir::Activation::NONE};
std::shared_ptr<ExternalContext> _external_context;
+
+ bool _prepared{false};
+
+ // Per channel output multiplier and shift.
+ std::vector<int32_t> _per_channel_output_multiplier;
+ std::vector<int> _per_channel_output_shift;
};
} // namespace ops
{
EinsumLayer::EinsumLayer()
- : _inputs(), _output(nullptr), _equation(), _einsum_kernel(new nnfw::cker::Einsum())
+ : _inputs(), _output(nullptr), _equation(), _einsum_kernel(new nnfw::cker::Einsum())
{
// DO NOTHING
}
for (uint32_t i = 0; i < num_inputs; i++)
{
- inputShapes.emplace_back(getTensorShape(_inputs[i]));
- inputFloatPtrs.emplace_back(reinterpret_cast<const float *>(_inputs[i]->buffer()));
+ inputShapes.emplace_back(getShape(_inputs[i]));
+ inputFloatPtrs.emplace_back(getBuffer<float>(_inputs[i]));
}
- kernel(_equation, inputShapes, inputFloatPtrs, getTensorShape(_output),
- reinterpret_cast<float *>(_output->buffer()));
+ kernel(_equation, inputShapes, inputFloatPtrs, getShape(_output), getBuffer<float>(_output));
}
void EinsumLayer::run()
{
ElementwiseActivationLayer::ElementwiseActivationLayer()
- : _input(nullptr), _output(nullptr), _kernel()
+ : _input(nullptr), _output(nullptr), _kernel()
{
// DO NOTHING
}
void ElementwiseActivationLayer::PopulateLookupTable(const ElementwiseActivationType op_type)
{
const auto input_scale = static_cast<double>(_input->data_scale());
- const auto input_zero_point = static_cast<int32_t>(_input->data_offset());
+ const auto input_zero_point = static_cast<int32_t>(_input->data_zero_point());
const auto output_scale = static_cast<double>(_output->data_scale());
- const auto output_zero_point = static_cast<int32_t>(_output->data_offset());
+ const auto output_zero_point = static_cast<int32_t>(_output->data_zero_point());
const float inverse_scale = 1 / output_scale;
int32_t maxval = std::numeric_limits<uint8_t>::max();
int32_t minval = std::numeric_limits<uint8_t>::min();
void ElementwiseActivationLayer::EvalUsingLookupTable(const IPortableTensor *input,
IPortableTensor *output)
{
- const int size = MatchingFlatSize(getTensorShape(input), getTensorShape(output));
- const uint8_t *input_data = reinterpret_cast<const uint8_t *>(input->buffer());
- uint8_t *output_data = reinterpret_cast<uint8_t *>(output->buffer());
+ const int size = MatchingFlatSize(getShape(input), getShape(output));
+ const uint8_t *input_data = getBuffer<uint8_t>(input);
+ uint8_t *output_data = getBuffer<uint8_t>(output);
for (int i = 0; i < size; ++i)
{
if (input->data_type() == OperandType::FLOAT32)
{
_kernel = [](const IPortableTensor *input, IPortableTensor *output) {
- nnfw::cker::ELU(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
- getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+ nnfw::cker::ELU(getShape(input), getBuffer<float>(input), getShape(output),
+ getBuffer<float>(output));
};
}
else
else if (_input->data_type() == OperandType::FLOAT32)
{
_kernel = [](const IPortableTensor *input, IPortableTensor *output) {
- nnfw::cker::Logistic(getTensorShape(input),
- reinterpret_cast<const float *>(input->buffer()),
- getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+ nnfw::cker::Logistic(getShape(input), getBuffer<float>(input), getShape(output),
+ getBuffer<float>(output));
};
}
else
if (alpha == std::numeric_limits<float>::infinity() && beta == 0.f)
{
_kernel = [](const IPortableTensor *input, IPortableTensor *output) {
- nnfw::cker::ReLU(getTensorShape(input),
- reinterpret_cast<const float *>(input->buffer()),
- getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+ nnfw::cker::ReLU(getShape(input), getBuffer<float>(input), getShape(output),
+ getBuffer<float>(output));
};
}
else if (alpha == 6.f && beta == 0.f)
{
_kernel = [](const IPortableTensor *input, IPortableTensor *output) {
- nnfw::cker::ReLU6(getTensorShape(input),
- reinterpret_cast<const float *>(input->buffer()),
- reinterpret_cast<float *>(output->buffer()));
+ nnfw::cker::ReLU6(getShape(input), getBuffer<float>(input), getBuffer<float>(output));
};
}
else
{
throw std::runtime_error(
- "ElementwiseActivationLayer : This layer suppports only ReLU(0-inf) and ReLU6(0-6)");
+ "ElementwiseActivationLayer : This layer suppports only ReLU(0-inf) and ReLU6(0-6)");
}
}
else
else if (_input->data_type() == OperandType::FLOAT32)
{
_kernel = [](const IPortableTensor *input, IPortableTensor *output) {
- nnfw::cker::Tanh(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
- getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+ nnfw::cker::Tanh(getShape(input), getBuffer<float>(input), getShape(output),
+ getBuffer<float>(output));
};
}
else
if (_input->data_type() == OperandType::FLOAT32)
{
_kernel = [alpha](const IPortableTensor *input, IPortableTensor *output) {
- nnfw::cker::LeakyReLU(nnfw::cker::LeakyReluParams{alpha}, getTensorShape(input),
- reinterpret_cast<const float *>(input->buffer()),
- getTensorShape(output),
- reinterpret_cast<float *>(output->buffer()));
+ nnfw::cker::LeakyReLU(nnfw::cker::LeakyReluParams{alpha}, getShape(input),
+ getBuffer<float>(input), getShape(output),
+ getBuffer<float>(output));
};
}
else
{
if (!HaveSameShapes(lhs, rhs))
{
- nnfw::cker::LogicalAndBroadcast<T>(
- getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), getTensorShape(rhs),
- reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
- reinterpret_cast<T *>(output->buffer()));
+ nnfw::cker::LogicalAndBroadcast<T>(getShape(lhs), getBuffer<T>(lhs), getShape(rhs),
+ getBuffer<T>(rhs), getShape(output), getBuffer<T>(output));
}
else
{
- nnfw::cker::LogicalAndElementwise<T>(
- getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- reinterpret_cast<const T *>(rhs->buffer()), reinterpret_cast<T *>(output->buffer()));
+ nnfw::cker::LogicalAndElementwise<T>(getShape(lhs), getBuffer<T>(lhs), getBuffer<T>(rhs),
+ getBuffer<T>(output));
}
}
{
if (!HaveSameShapes(lhs, rhs))
{
- nnfw::cker::LogicalOrBroadcast<T>(
- getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), getTensorShape(rhs),
- reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
- reinterpret_cast<T *>(output->buffer()));
+ nnfw::cker::LogicalOrBroadcast<T>(getShape(lhs), getBuffer<T>(lhs), getShape(rhs),
+ getBuffer<T>(rhs), getShape(output), getBuffer<T>(output));
}
else
{
- nnfw::cker::LogicalOrElementwise<T>(
- getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- reinterpret_cast<const T *>(rhs->buffer()), reinterpret_cast<T *>(output->buffer()));
+ nnfw::cker::LogicalOrElementwise<T>(getShape(lhs), getBuffer<T>(lhs), getBuffer<T>(rhs),
+ getBuffer<T>(output));
}
}
template <typename T>
void maximumGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output)
{
- nnfw::cker::Max<T>(getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getTensorShape(output), reinterpret_cast<T *>(output->buffer()));
+ nnfw::cker::Max<T>(getShape(lhs), getBuffer<T>(lhs), getShape(rhs), getBuffer<T>(rhs),
+ getShape(output), getBuffer<T>(output));
}
template <typename T>
void minimumGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output)
{
- nnfw::cker::Min<T>(getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getTensorShape(output), reinterpret_cast<T *>(output->buffer()));
+ nnfw::cker::Min<T>(getShape(lhs), getBuffer<T>(lhs), getShape(rhs), getBuffer<T>(rhs),
+ getShape(output), getBuffer<T>(output));
}
bool haveSameQauntInfo(const IPortableTensor *lhs, const IPortableTensor *rhs,
const IPortableTensor *output)
{
return (lhs->data_scale() == rhs->data_scale() && lhs->data_scale() == output->data_scale()) &&
- (lhs->data_offset() == rhs->data_offset() && lhs->data_offset() == output->data_offset());
+ (lhs->data_zero_point() == rhs->data_zero_point() &&
+ lhs->data_zero_point() == output->data_zero_point());
}
} // namespace
#include <cker/operation/Erf.h>
#include <cker/operation/Exp.h>
#include <cker/operation/LogicalNot.h>
-#include <cker/operation/Quantize.h>
#include <cker/operation/Round.h>
namespace onert
{
void absFloat32(const IPortableTensor *input, IPortableTensor *output)
{
- nnfw::cker::Abs(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
- getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+ nnfw::cker::Abs(getShape(input), getBuffer<float>(input), getShape(output),
+ getBuffer<float>(output));
}
template <typename FromT>
const auto in = *reinterpret_cast<const DataPtr *>(&input_buf);
auto out = *reinterpret_cast<DataPtr *>(&output_buf);
- auto input_shape = getTensorShape(input);
- auto output_shape = getTensorShape(output);
+ auto input_shape = getShape(input);
+ auto output_shape = getShape(output);
const auto num_elements = MatchingFlatSize(input_shape, output_shape);
switch (input->data_type())
void cosFloat32(const IPortableTensor *input, IPortableTensor *output)
{
- nnfw::cker::Cos(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
- getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+ nnfw::cker::Cos(getShape(input), getBuffer<float>(input), getShape(output),
+ getBuffer<float>(output));
}
void dequantizeInt8(const IPortableTensor *input, IPortableTensor *output)
{
- nnfw::cker::Dequantize(getTensorShape(input), reinterpret_cast<const int8_t *>(input->buffer()),
- getTensorShape(output), reinterpret_cast<float *>(output->buffer()),
- input->data_scale(), input->data_offset());
+ nnfw::cker::Dequantize(getShape(input), getBuffer<int8_t>(input), getShape(output),
+ getBuffer<float>(output), input->data_scale(), input->data_zero_point());
}
void dequantizeUint8(const IPortableTensor *input, IPortableTensor *output)
{
- nnfw::cker::Dequantize(getTensorShape(input), reinterpret_cast<const uint8_t *>(input->buffer()),
- getTensorShape(output), reinterpret_cast<float *>(output->buffer()),
- input->data_scale(), input->data_offset());
+ nnfw::cker::Dequantize(getShape(input), getBuffer<uint8_t>(input), getShape(output),
+ getBuffer<float>(output), input->data_scale(), input->data_zero_point());
}
void expFloat32(const IPortableTensor *input, IPortableTensor *output)
{
- nnfw::cker::Exp(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
- getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+ nnfw::cker::Exp(getShape(input), getBuffer<float>(input), getShape(output),
+ getBuffer<float>(output));
}
void erfFloat32(const IPortableTensor *input, IPortableTensor *output)
{
- nnfw::cker::Erf(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
- getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+ nnfw::cker::Erf(getShape(input), getBuffer<float>(input), getShape(output),
+ getBuffer<float>(output));
}
void floorFloat32(const IPortableTensor *input, IPortableTensor *output)
{
- nnfw::cker::Floor(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
- getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+ nnfw::cker::Floor(getShape(input), getBuffer<float>(input), getShape(output),
+ getBuffer<float>(output));
}
void logFloat32(const IPortableTensor *input, IPortableTensor *output)
{
- nnfw::cker::Log(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
- getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+ nnfw::cker::Log(getShape(input), getBuffer<float>(input), getShape(output),
+ getBuffer<float>(output));
}
void logicalNot(const IPortableTensor *input, IPortableTensor *output)
{
- nnfw::cker::LogicalNot(getTensorShape(input), reinterpret_cast<const bool *>(input->buffer()),
- getTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
+ nnfw::cker::LogicalNot(getShape(input), getBuffer<bool>(input), getShape(output),
+ getBuffer<bool>(output));
}
template <typename T> void neg(const IPortableTensor *input, IPortableTensor *output)
{
- nnfw::cker::Neg<T>(getTensorShape(input), reinterpret_cast<const T *>(input->buffer()),
- getTensorShape(output), reinterpret_cast<T *>(output->buffer()));
-}
-
-template <typename InputT, typename OutputT>
-void affineQuantize(const IPortableTensor *input, IPortableTensor *output)
-{
- nnfw::cker::Quantize(getTensorShape(input), reinterpret_cast<const InputT *>(input->buffer()),
- getTensorShape(output), reinterpret_cast<OutputT *>(output->buffer()),
- output->data_scale(), output->data_offset());
+ nnfw::cker::Neg<T>(getShape(input), getBuffer<T>(input), getShape(output), getBuffer<T>(output));
}
void roundFloat32(const IPortableTensor *input, IPortableTensor *output)
{
- nnfw::cker::Round(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
- getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+ nnfw::cker::Round(getShape(input), getBuffer<float>(input), getShape(output),
+ getBuffer<float>(output));
}
void rsqrtFloat32(const IPortableTensor *input, IPortableTensor *output)
{
- nnfw::cker::Rsqrt(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
- getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+ nnfw::cker::Rsqrt(getShape(input), getBuffer<float>(input), getShape(output),
+ getBuffer<float>(output));
}
void sinFloat32(const IPortableTensor *input, IPortableTensor *output)
{
- nnfw::cker::Sin(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
- getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+ nnfw::cker::Sin(getShape(input), getBuffer<float>(input), getShape(output),
+ getBuffer<float>(output));
}
void sqrtFloat32(const IPortableTensor *input, IPortableTensor *output)
{
- nnfw::cker::Sqrt(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
- getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+ nnfw::cker::Sqrt(getShape(input), getBuffer<float>(input), getShape(output),
+ getBuffer<float>(output));
}
void squareFloat32(const IPortableTensor *input, IPortableTensor *output)
{
- nnfw::cker::Square(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
- getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+ nnfw::cker::Square(getShape(input), getBuffer<float>(input), getShape(output),
+ getBuffer<float>(output));
}
template <typename T> void zerosLikeFloat32(const IPortableTensor *input, IPortableTensor *output)
if (!HaveSameShapes(input, output))
throw std::runtime_error{"ZerosLike: input and output shape don't match."};
- auto element_size = getTensorShape(input).FlatSize();
+ auto element_size = getShape(input).FlatSize();
- memset(reinterpret_cast<T *>(output->buffer()), 0, element_size * sizeof(T));
+ memset(getBuffer<T>(output), 0, element_size * sizeof(T));
}
} // namespace
throw std::runtime_error{"Neg: Unsupported data type"};
}
break;
- case ElementwiseUnaryType::kQuantize:
- if ((input->data_type() == OperandType::FLOAT32))
- {
- _kernel = affineQuantize<float, uint8_t>;
- }
- else
- {
- throw std::runtime_error{"Quantize: Unsupported data type"};
- }
- break;
case ElementwiseUnaryType::kRound:
if ((input->data_type() == OperandType::FLOAT32))
{
}
break;
default:
- throw std::runtime_error{"ElementwiseBinary: Unsupported ElementwiseBinary type"};
+ throw std::runtime_error{"ElementwiseUnary: Unsupported ElementwiseUnary type"};
}
}
switch (_output->data_type())
{
case OperandType::FLOAT32:
- nnfw::cker::Fill<float *>(reinterpret_cast<float *>(_value->buffer()),
- getTensorShape(_output),
- reinterpret_cast<float *>(_output->buffer()));
+ nnfw::cker::Fill<float>(getBuffer<float>(_value), getShape(_output),
+ getBuffer<float>(_output));
break;
case OperandType::INT32:
- nnfw::cker::Fill<int32_t *>(reinterpret_cast<int32_t *>(_value->buffer()),
- getTensorShape(_output),
- reinterpret_cast<int32_t *>(_output->buffer()));
+ nnfw::cker::Fill<int32_t>(getBuffer<int32_t>(_value), getShape(_output),
+ getBuffer<int32_t>(_output));
break;
case OperandType::INT64:
- nnfw::cker::Fill<int64_t *>(reinterpret_cast<int64_t *>(_value->buffer()),
- getTensorShape(_output),
- reinterpret_cast<int64_t *>(_output->buffer()));
+ nnfw::cker::Fill<int64_t>(getBuffer<int64_t>(_value), getShape(_output),
+ getBuffer<int64_t>(_output));
break;
case OperandType::UINT32:
- nnfw::cker::Fill<uint32_t *>(reinterpret_cast<uint32_t *>(_value->buffer()),
- getTensorShape(_output),
- reinterpret_cast<uint32_t *>(_output->buffer()));
+ nnfw::cker::Fill<uint32_t>(getBuffer<uint32_t>(_value), getShape(_output),
+ getBuffer<uint32_t>(_output));
break;
default:
throw std::runtime_error{"Fill: unsupported data type"};
{
FullyConnectedLayer::FullyConnectedLayer()
- : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
- _activation(ir::Activation::NONE), _temp_arena(new nnfw::cker::FCTempArena()),
- _external_context(nullptr), _is_hybrid(false), _is_shuffled16x1float32(false)
+ : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
+ _activation(ir::Activation::NONE), _temp_arena(new nnfw::cker::FCTempArena()),
+ _external_context(nullptr), _is_hybrid(false), _is_shuffled16x1float32(false)
{
// DO NOTHING
}
nnfw::cker::FullyConnectedParams op_params;
op_params.activation = convertActivationType(_activation);
- nnfw::cker::FullyConnected(
- op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
- getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+ nnfw::cker::FullyConnected(op_params, getShape(_input), getBuffer<float>(_input),
+ getShape(_weights), getBuffer<float>(_weights), getShape(_bias),
+ _bias ? getBuffer<float>(_bias) : nullptr, getShape(_output),
+ getBuffer<float>(_output));
}
// executionMutex is used to protect concurrent access of non-threadsafe resources
int32_t output_activation_max = 0;
GetQuantizedConvolutionMultiplier(_input, _weights, _bias, _output, &real_multiplier);
QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
- CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
- &output_activation_max);
+ CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
+ &output_activation_max);
nnfw::cker::FullyConnectedParams op_params;
- op_params.input_offset = -_input->data_offset();
- op_params.weights_offset = -_weights->data_offset();
- op_params.output_offset = _output->data_offset();
+ op_params.input_offset = -_input->data_zero_point();
+ op_params.weights_offset = -_weights->data_zero_point();
+ op_params.output_offset = _output->data_zero_point();
op_params.output_multiplier = output_multiplier;
op_params.output_shift = output_shift;
op_params.quantized_activation_min = output_activation_min;
op_params.quantized_activation_max = output_activation_max;
- nnfw::cker::FullyConnected(
- op_params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
- getTensorShape(_weights), reinterpret_cast<const uint8_t *>(_weights->buffer()),
- getTensorShape(_bias), reinterpret_cast<const int32_t *>(_bias ? _bias->buffer() : nullptr),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
+ nnfw::cker::FullyConnected(op_params, getShape(_input), getBuffer<uint8_t>(_input),
+ getShape(_weights), getBuffer<uint8_t>(_weights), getShape(_bias),
+ _bias ? getBuffer<int32_t>(_bias) : nullptr, getShape(_output),
+ getBuffer<uint8_t>(_output));
}
void FullyConnectedLayer::fullyConnectedHybrid()
nnfw::cker::FCTempArena &temp_arena = *_temp_arena;
if (!temp_arena.prepared)
{
- temp_arena.prepare(getTensorShape(_input), getTensorShape(_weights));
+ temp_arena.prepare(getShape(_input), getShape(_weights));
}
nnfw::cker::FullyConnectedParams op_params;
#ifndef USE_RUY_GEMV
nnfw::cker::FullyConnectedHybrid(
- op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_weights), reinterpret_cast<const int8_t *>(_weights->buffer()),
- getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena,
- _external_context->ruy_context());
+ op_params, getShape(_input), getBuffer<float>(_input), getShape(_weights),
+ getBuffer<int8_t>(_weights), getShape(_bias), _bias ? getBuffer<float>(_bias) : nullptr,
+ getShape(_output), getBuffer<float>(_output), temp_arena, _external_context->ruy_context());
#else
nnfw::cker::FullyConnectedHybrid(
- op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_weights),
- (_cached_weights) ? reinterpret_cast<const int8_t *>(_cached_weights)
- : reinterpret_cast<const int8_t *>(_weights->buffer()),
- getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena,
- _external_context->ruy_context());
+ op_params, getShape(_input), getBuffer<float>(_input), getShape(_weights),
+ (_cached_weights) ? reinterpret_cast<const int8_t *>(_cached_weights)
+ : getBuffer<int8_t>(_weights),
+ getShape(_bias), _bias ? getBuffer<float>(_bias) : nullptr, getShape(_output),
+ getBuffer<float>(_output), temp_arena, _external_context->ruy_context());
if (_cached_weights == nullptr || _is_weights_freed)
return;
// if input's elements are filled with zero, it by-passes(does not enter ruy-kernel path)
// so that handle this case
- const int input_size = getTensorShape(_input).FlatSize();
- if (nnfw::cker::IsZeroVector(reinterpret_cast<float *>(_input->buffer()), input_size))
+ const int input_size = getShape(_input).FlatSize();
+ if (nnfw::cker::IsZeroVector(getBuffer<float>(_input), input_size))
return;
auto weight_tensor = nnfw::misc::polymorphic_downcast<const Tensor *>(_weights);
tensor->decrease_ref();
if (tensor->buffer() == nullptr) // ref == 0?
{
+#if defined(__ANDROID__) && (__ANDROID_API__ >= 26)
+ // NOTE This line forces OS to release any unused memory immediately
+ mallopt(M_PURGE, 0);
+#endif
_is_weights_freed = true;
}
#endif
if (block_size.size() == 0)
{
nnfw::cker::FullyConnectedSparseWeightRandom(
- op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
- getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), w1_segments,
- w1_indices);
+ op_params, getShape(_input), getBuffer<float>(_input), getShape(_weights),
+ getBuffer<float>(_weights), getShape(_bias), _bias ? getBuffer<float>(_bias) : nullptr,
+ getShape(_output), getBuffer<float>(_output), w1_segments, w1_indices);
}
else if (block_size.size() == 2 && block_size[0] == 16 && block_size[1] == 1)
{
nnfw::cker::FullyConnectedSparseWeight16x1(
- op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
- getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), w1_segments,
- w1_indices);
+ op_params, getShape(_input), getBuffer<float>(_input), getShape(_weights),
+ getBuffer<float>(_weights), getShape(_bias), _bias ? getBuffer<float>(_bias) : nullptr,
+ getShape(_output), getBuffer<float>(_output), w1_segments, w1_indices);
}
else
throw std::runtime_error{"FullyConnected: unsupported sparsity"};
nnfw::cker::FullyConnectedParams op_params;
op_params.activation = convertActivationType(_activation);
- nnfw::cker::FullyConnected16x1Float32(
- op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
- getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+ nnfw::cker::FullyConnected16x1Float32(op_params, getShape(_input), getBuffer<float>(_input),
+ getShape(_weights), getBuffer<float>(_weights),
+ getShape(_bias), _bias ? getBuffer<float>(_bias) : nullptr,
+ getShape(_output), getBuffer<float>(_output));
#else
throw std::runtime_error{"FullyConnected: Shuffled16x1Float32 weights_format is not supported."};
#endif
if (_is_shuffled16x1float32)
{
throw std::runtime_error{
- "FullyConnected: Shuffled16x1Float32 weights_format is not supported."};
+ "FullyConnected: Shuffled16x1Float32 weights_format is not supported."};
}
#endif
_external_context = external_context;
{
if (_bias && _bias->is_constant())
{
- const int bias_size = getTensorShape(_bias).FlatSize();
- if (nnfw::cker::IsZeroVector(reinterpret_cast<float *>(_bias->buffer()), bias_size))
+ const int bias_size = getShape(_bias).FlatSize();
+ if (nnfw::cker::IsZeroVector(getBuffer<float>(_bias), bias_size))
{
_bias = nullptr;
}
if (_input->is_dynamic() || !_weights->is_constant())
return;
- const int rows = getTensorShape(_weights).Dims(0);
+ const int rows = getShape(_weights).Dims(0);
if (rows % 4 == 0)
{
// TODO If it's possible to extract precaching from ruy kernel,
{
FusedBatchNormLayer::FusedBatchNormLayer()
- : _inputs(), _output(nullptr), _epsilon(0), _is_training(true),
- _fusedbatchnorm_kernel(new nnfw::cker::FusedBatchNorm())
+ : _inputs(), _output(nullptr), _epsilon(0), _is_training(true),
+ _fusedbatchnorm_kernel(new nnfw::cker::FusedBatchNorm())
{
// DO NOTHING
}
for (uint32_t i = 0; i < num_inputs; i++)
{
- inputShapes.emplace_back(getTensorShape(_inputs[i]));
- inputFloatPtrs.emplace_back(reinterpret_cast<const float *>(_inputs[i]->buffer()));
+ inputShapes.emplace_back(getShape(_inputs[i]));
+ inputFloatPtrs.emplace_back(getBuffer<float>(_inputs[i]));
}
nnfw::cker::FusedBatchNormParams param;
param.is_training = _is_training;
param.data_format = _data_format;
- kernel(inputShapes, inputFloatPtrs, getTensorShape(_output),
- reinterpret_cast<float *>(_output->buffer()), param);
+ kernel(inputShapes, inputFloatPtrs, getShape(_output), getBuffer<float>(_output), param);
}
void FusedBatchNormLayer::run()
using IndicesType = int32_t;
nnfw::cker::Gather<InputType, IndicesType>(
- op_params, getTensorShape(_input), reinterpret_cast<const InputType *>(_input->buffer()),
- getTensorShape(_indices), reinterpret_cast<const IndicesType *>(_indices->buffer()),
- getTensorShape(_output), reinterpret_cast<OutputType *>(_output->buffer()));
+ op_params, getShape(_input), getBuffer<InputType>(_input), getShape(_indices),
+ getBuffer<IndicesType>(_indices), getShape(_output), getBuffer<OutputType>(_output));
break;
}
case OperandType::INT64:
using IndicesType = int64_t;
nnfw::cker::Gather<InputType, IndicesType>(
- op_params, getTensorShape(_input), reinterpret_cast<const InputType *>(_input->buffer()),
- getTensorShape(_indices), reinterpret_cast<const IndicesType *>(_indices->buffer()),
- getTensorShape(_output), reinterpret_cast<OutputType *>(_output->buffer()));
+ op_params, getShape(_input), getBuffer<InputType>(_input), getShape(_indices),
+ getBuffer<IndicesType>(_indices), getShape(_output), getBuffer<OutputType>(_output));
break;
}
default:
switch (_input->data_type())
{
case OperandType::FLOAT32:
- nnfw::cker::L2NormalizeFloat32(
- getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+ nnfw::cker::L2NormalizeFloat32(getShape(_input), getBuffer<float>(_input), getShape(_output),
+ getBuffer<float>(_output));
break;
case OperandType::QUANT_UINT8_ASYMM:
{
nnfw::cker::L2NormParams params;
- assert(_input->data_offset() == 128);
- params.input_zero_point = _input->data_offset();
- nnfw::cker::L2NormalizeQuant8(
- params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
+ assert(_input->data_zero_point() == 128);
+ params.input_zero_point = _input->data_zero_point();
+ nnfw::cker::L2NormalizeQuant8(params, getShape(_input), getBuffer<uint8_t>(_input),
+ getShape(_output), getBuffer<uint8_t>(_output));
}
break;
else
{
assert(tensor->total_size() == total_size);
- return reinterpret_cast<T *>(tensor->buffer());
+ return getBuffer<T>(tensor);
}
}
else
memset(buffer, 0, tensor_in->total_size());
}
-}
+} // namespace
void LSTMLayer::LSTMFloat()
{
- assert(_input->num_dimensions() >= 2 && _input->num_dimensions() <= 3);
+ auto in_shape = _input->getShape();
+ assert(in_shape.rank() >= 2 && in_shape.rank() <= 3);
int max_time, n_batch;
- if (_input->num_dimensions() == 3)
+ if (in_shape.rank() == 3)
{
- max_time = (_time_major) ? _input->dimension(0) : _input->dimension(1);
- n_batch = (_time_major) ? _input->dimension(1) : _input->dimension(0);
+ max_time = (_time_major) ? in_shape.dim(0) : in_shape.dim(1);
+ n_batch = (_time_major) ? in_shape.dim(1) : in_shape.dim(0);
}
else
{
max_time = 1;
- n_batch = _input->dimension(0);
+ n_batch = in_shape.dim(0);
}
- const int n_input = _input->dimension(_input->num_dimensions() - 1);
+ const int n_input = in_shape.dim(_input->getShape().rank() - 1);
const int aux_input_size = 0;
// n_cell and n_output will be the same size when there is no projection.
- const int n_cell = _input_to_output_weights->dimension(0);
- const int n_output = _recurrent_to_output_weights->dimension(1);
+ const int n_cell = _input_to_output_weights->getShape().dim(0);
+ const int n_output = _recurrent_to_output_weights->getShape().dim(1);
// Since we have already checked that weights are all there or none, we can
// check the existence of only one to the get the condition.
float *output_state_buf = getOptionalOutputBuffer<float>(_output_state, &_output_state_vec,
_output_state_in->total_size());
float *cell_state_buf =
- getOptionalOutputBuffer<float>(_cell_state, &_cell_state_vec, _cell_state_in->total_size());
+ getOptionalOutputBuffer<float>(_cell_state, &_cell_state_vec, _cell_state_in->total_size());
initializeStateBuffer(_output_state_in, output_state_buf, _has_output_state_data);
initializeStateBuffer(_cell_state_in, cell_state_buf, _has_cell_state_data);
// Index the scratch buffers pointers to the global scratch buffer.
float *scratch_buffer_buf = getOptionalOutputBuffer<float>(
- _scratch_buffer, &_scratch_vec, n_batch * n_cell * (use_cifg ? 3 : 4) * sizeof(float));
+ _scratch_buffer, &_scratch_vec, n_batch * n_cell * (use_cifg ? 3 : 4) * sizeof(float));
float *input_gate_scratch = nullptr;
float *cell_gate_scratch = nullptr;
float *forget_gate_scratch = nullptr;
auto optional_tensor_ptr = [](const IPortableTensor *tensor) {
// If tensor is not given or the tensor size is 0, consider it was not given
- return (tensor && tensor->total_size() > 0) ? reinterpret_cast<float *>(tensor->buffer())
- : nullptr;
+ return (tensor && tensor->total_size() > 0) ? getBuffer<float>(tensor) : nullptr;
};
// Optional inputs
- float *input_to_input_weights_ptr = optional_tensor_ptr(_input_to_input_weights);
- float *recurrent_to_input_weights_ptr = optional_tensor_ptr(_recurrent_to_input_weights);
- float *cell_to_input_weights_ptr = optional_tensor_ptr(_cell_to_input_weights);
- float *cell_to_forget_weights_ptr = optional_tensor_ptr(_cell_to_forget_weights);
- float *cell_to_output_weights_ptr = optional_tensor_ptr(_cell_to_output_weights);
- float *input_gate_bias_ptr = optional_tensor_ptr(_input_gate_bias);
- float *projection_weights_ptr = optional_tensor_ptr(_projection_weights);
- float *projection_bias_ptr = optional_tensor_ptr(_projection_bias);
- float *input_layer_norm_coefficients_ptr = optional_tensor_ptr(_input_layer_norm_coefficients);
- float *forget_layer_norm_coefficients_ptr = optional_tensor_ptr(_forget_layer_norm_coefficients);
- float *cell_layer_norm_coefficients_ptr = optional_tensor_ptr(_cell_layer_norm_coefficients);
- float *output_layer_norm_coefficients_ptr = optional_tensor_ptr(_output_layer_norm_coefficients);
+ const float *input_to_input_weights_ptr = optional_tensor_ptr(_input_to_input_weights);
+ const float *recurrent_to_input_weights_ptr = optional_tensor_ptr(_recurrent_to_input_weights);
+ const float *cell_to_input_weights_ptr = optional_tensor_ptr(_cell_to_input_weights);
+ const float *cell_to_forget_weights_ptr = optional_tensor_ptr(_cell_to_forget_weights);
+ const float *cell_to_output_weights_ptr = optional_tensor_ptr(_cell_to_output_weights);
+ const float *input_gate_bias_ptr = optional_tensor_ptr(_input_gate_bias);
+ const float *projection_weights_ptr = optional_tensor_ptr(_projection_weights);
+ const float *projection_bias_ptr = optional_tensor_ptr(_projection_bias);
+ const float *input_layer_norm_coefficients_ptr =
+ optional_tensor_ptr(_input_layer_norm_coefficients);
+ const float *forget_layer_norm_coefficients_ptr =
+ optional_tensor_ptr(_forget_layer_norm_coefficients);
+ const float *cell_layer_norm_coefficients_ptr =
+ optional_tensor_ptr(_cell_layer_norm_coefficients);
+ const float *output_layer_norm_coefficients_ptr =
+ optional_tensor_ptr(_output_layer_norm_coefficients);
// Copy out the LSTM specific params so they can be passed in the function.
nnfw::cker::LSTMParams lstm_params;
lstm_params.cell_clip = _params.cell_threshold;
lstm_params.proj_clip = _params.projection_threshold;
- const int output_batch_leading_dim = _output->dimension(_output->num_dimensions() - 1);
+ auto out_shape = _output->getShape();
+ const int output_batch_leading_dim = out_shape.dim(out_shape.rank() - 1);
if (_time_major)
{
// Loop through the sequence.
// If this is the forward_sequence, step forward, otherwise step
// backwards.
const int t_rel = _forward_sequence ? t : max_time - t - 1;
- const float *input_ptr = reinterpret_cast<float *>(_input->buffer()) + t_rel * input_step;
+ const float *input_ptr = getBuffer<float>(_input) + t_rel * input_step;
const float *aux_input_ptr = nullptr;
if (_aux_input)
{
- aux_input_ptr = reinterpret_cast<float *>(_aux_input->buffer()) + t_rel * input_step;
+ aux_input_ptr = getBuffer<float>(_aux_input) + t_rel * input_step;
}
- float *output_ptr =
- reinterpret_cast<float *>(_output->buffer()) + t_rel * output_step + _output_offset;
+ float *output_ptr = getBuffer<float>(_output) + t_rel * output_step + _output_offset;
LstmStepFloat(
- input_ptr, input_to_input_weights_ptr,
- reinterpret_cast<float *>(_input_to_forget_weights->buffer()),
- reinterpret_cast<float *>(_input_to_cell_weights->buffer()),
- reinterpret_cast<float *>(_input_to_output_weights->buffer()), aux_input_ptr,
- /*aux_input_to_input_weights=*/nullptr,
- /*aux_input_to_forget_weights=*/nullptr,
- /*aux_input_to_cell_weights=*/nullptr,
- /*aux_input_to_output_weights=*/nullptr, recurrent_to_input_weights_ptr,
- reinterpret_cast<float *>(_recurrent_to_forget_weights->buffer()),
- reinterpret_cast<float *>(_recurrent_to_cell_weights->buffer()),
- reinterpret_cast<float *>(_recurrent_to_output_weights->buffer()),
- cell_to_input_weights_ptr, cell_to_forget_weights_ptr, cell_to_output_weights_ptr,
- input_layer_norm_coefficients_ptr, forget_layer_norm_coefficients_ptr,
- cell_layer_norm_coefficients_ptr, output_layer_norm_coefficients_ptr, input_gate_bias_ptr,
- reinterpret_cast<float *>(_forget_gate_bias->buffer()),
- reinterpret_cast<float *>(_cell_gate_bias->buffer()),
- reinterpret_cast<float *>(_output_gate_bias->buffer()), projection_weights_ptr,
- projection_bias_ptr, &lstm_params, n_batch, n_cell, n_input, aux_input_size, n_output,
- output_batch_leading_dim, output_state_buf, cell_state_buf, input_gate_scratch,
- forget_gate_scratch, cell_gate_scratch, output_gate_scratch, output_ptr);
+ input_ptr, input_to_input_weights_ptr, getBuffer<float>(_input_to_forget_weights),
+ getBuffer<float>(_input_to_cell_weights), getBuffer<float>(_input_to_output_weights),
+ aux_input_ptr,
+ /*aux_input_to_input_weights=*/nullptr,
+ /*aux_input_to_forget_weights=*/nullptr,
+ /*aux_input_to_cell_weights=*/nullptr,
+ /*aux_input_to_output_weights=*/nullptr, recurrent_to_input_weights_ptr,
+ getBuffer<float>(_recurrent_to_forget_weights),
+ getBuffer<float>(_recurrent_to_cell_weights),
+ getBuffer<float>(_recurrent_to_output_weights), cell_to_input_weights_ptr,
+ cell_to_forget_weights_ptr, cell_to_output_weights_ptr, input_layer_norm_coefficients_ptr,
+ forget_layer_norm_coefficients_ptr, cell_layer_norm_coefficients_ptr,
+ output_layer_norm_coefficients_ptr, input_gate_bias_ptr,
+ getBuffer<float>(_forget_gate_bias), getBuffer<float>(_cell_gate_bias),
+ getBuffer<float>(_output_gate_bias), projection_weights_ptr, projection_bias_ptr,
+ &lstm_params, n_batch, n_cell, n_input, aux_input_size, n_output, output_batch_leading_dim,
+ output_state_buf, cell_state_buf, input_gate_scratch, forget_gate_scratch,
+ cell_gate_scratch, output_gate_scratch, output_ptr);
}
}
else
// backwards.
const int t_rel = _forward_sequence ? t : max_time - t - 1;
const int time_offset = b * max_time + t_rel;
- const float *input_ptr =
- reinterpret_cast<float *>(_input->buffer()) + time_offset * input_step;
+ const float *input_ptr = getBuffer<float>(_input) + time_offset * input_step;
const float *aux_input_ptr = nullptr;
if (_aux_input)
{
- aux_input_ptr =
- reinterpret_cast<float *>(_aux_input->buffer()) + time_offset * input_step;
+ aux_input_ptr = getBuffer<float>(_aux_input) + time_offset * input_step;
}
- float *output_ptr = reinterpret_cast<float *>(_output->buffer()) +
- time_offset * output_step + _output_offset;
+ float *output_ptr = getBuffer<float>(_output) + time_offset * output_step + _output_offset;
// Offset the {output,cell}_state pointers to the right batch.
float *output_state_ptr = output_state_buf + b * output_batch_leading_dim;
float *cell_state_ptr = cell_state_buf + b * n_cell;
// Offset the scratch pointers to the right batch.
float *input_gate_scratch_ptr =
- input_gate_scratch ? input_gate_scratch + b * n_cell : nullptr;
+ input_gate_scratch ? input_gate_scratch + b * n_cell : nullptr;
float *forget_gate_scratch_ptr = forget_gate_scratch + b * n_cell;
float *cell_gate_scratch_ptr = cell_gate_scratch + b * n_cell;
float *output_gate_scratch_ptr = output_gate_scratch + b * n_cell;
LstmStepFloat(
- input_ptr, input_to_input_weights_ptr,
- reinterpret_cast<float *>(_input_to_forget_weights->buffer()),
- reinterpret_cast<float *>(_input_to_cell_weights->buffer()),
- reinterpret_cast<float *>(_input_to_output_weights->buffer()), aux_input_ptr,
- /*aux_input_to_input_weights=*/nullptr,
- /*aux_input_to_forget_weights=*/nullptr,
- /*aux_input_to_cell_weights=*/nullptr,
- /*aux_input_to_output_weights=*/nullptr, recurrent_to_input_weights_ptr,
- reinterpret_cast<float *>(_recurrent_to_forget_weights->buffer()),
- reinterpret_cast<float *>(_recurrent_to_cell_weights->buffer()),
- reinterpret_cast<float *>(_recurrent_to_output_weights->buffer()),
- cell_to_input_weights_ptr, cell_to_forget_weights_ptr, cell_to_output_weights_ptr,
- input_layer_norm_coefficients_ptr, forget_layer_norm_coefficients_ptr,
- cell_layer_norm_coefficients_ptr, output_layer_norm_coefficients_ptr,
- input_gate_bias_ptr, reinterpret_cast<float *>(_forget_gate_bias->buffer()),
- reinterpret_cast<float *>(_cell_gate_bias->buffer()),
- reinterpret_cast<float *>(_output_gate_bias->buffer()), projection_weights_ptr,
- projection_bias_ptr, &lstm_params, /*n_batch=*/1, n_cell, n_input, aux_input_size,
- n_output, output_batch_leading_dim, output_state_ptr, cell_state_ptr,
- input_gate_scratch_ptr, forget_gate_scratch_ptr, cell_gate_scratch_ptr,
- output_gate_scratch_ptr, output_ptr);
+ input_ptr, input_to_input_weights_ptr, getBuffer<float>(_input_to_forget_weights),
+ getBuffer<float>(_input_to_cell_weights), getBuffer<float>(_input_to_output_weights),
+ aux_input_ptr,
+ /*aux_input_to_input_weights=*/nullptr,
+ /*aux_input_to_forget_weights=*/nullptr,
+ /*aux_input_to_cell_weights=*/nullptr,
+ /*aux_input_to_output_weights=*/nullptr, recurrent_to_input_weights_ptr,
+ getBuffer<float>(_recurrent_to_forget_weights),
+ getBuffer<float>(_recurrent_to_cell_weights),
+ getBuffer<float>(_recurrent_to_output_weights), cell_to_input_weights_ptr,
+ cell_to_forget_weights_ptr, cell_to_output_weights_ptr, input_layer_norm_coefficients_ptr,
+ forget_layer_norm_coefficients_ptr, cell_layer_norm_coefficients_ptr,
+ output_layer_norm_coefficients_ptr, input_gate_bias_ptr,
+ getBuffer<float>(_forget_gate_bias), getBuffer<float>(_cell_gate_bias),
+ getBuffer<float>(_output_gate_bias), projection_weights_ptr, projection_bias_ptr,
+ &lstm_params, /*n_batch=*/1, n_cell, n_input, aux_input_size, n_output,
+ output_batch_leading_dim, output_state_ptr, cell_state_ptr, input_gate_scratch_ptr,
+ forget_gate_scratch_ptr, cell_gate_scratch_ptr, output_gate_scratch_ptr, output_ptr);
}
}
}
}
void LSTMLayer::configure(
- const IPortableTensor *input, const IPortableTensor *input_to_input_weights,
- const IPortableTensor *input_to_forget_weights, const IPortableTensor *input_to_cell_weights,
- const IPortableTensor *input_to_output_weights,
- const IPortableTensor *recurrent_to_input_weights,
- const IPortableTensor *recurrent_to_forget_weights,
- const IPortableTensor *recurrent_to_cell_weights,
- const IPortableTensor *recurrent_to_output_weights,
- const IPortableTensor *cell_to_input_weights, const IPortableTensor *cell_to_forget_weights,
- const IPortableTensor *cell_to_output_weights, const IPortableTensor *input_layer_norm_weights,
- const IPortableTensor *forget_layer_norm_weights,
- const IPortableTensor *cell_layer_norm_weights,
- const IPortableTensor *output_layer_norm_weights, const IPortableTensor *aux_input,
- const IPortableTensor *aux_input_to_input_weights,
- const IPortableTensor *aux_input_to_forget_weights,
- const IPortableTensor *aux_input_to_cell_weights,
- const IPortableTensor *aux_input_to_output_weights, const IPortableTensor *input_gate_bias,
- const IPortableTensor *forget_gate_bias, const IPortableTensor *cell_gate_bias,
- const IPortableTensor *output_gate_bias, const IPortableTensor *projection_weights,
- const IPortableTensor *projection_bias, const IPortableTensor *output_state_in,
- const IPortableTensor *cell_state_in, const ir::operation::LSTM::Param ¶ms,
- bool forward_sequence, bool time_major, int output_offset, IPortableTensor *scratch_buffer,
- IPortableTensor *output_state, IPortableTensor *cell_state, IPortableTensor *output,
- bool has_output_state_data, bool has_cell_state_data)
+ const IPortableTensor *input, const IPortableTensor *input_to_input_weights,
+ const IPortableTensor *input_to_forget_weights, const IPortableTensor *input_to_cell_weights,
+ const IPortableTensor *input_to_output_weights, const IPortableTensor *recurrent_to_input_weights,
+ const IPortableTensor *recurrent_to_forget_weights,
+ const IPortableTensor *recurrent_to_cell_weights,
+ const IPortableTensor *recurrent_to_output_weights, const IPortableTensor *cell_to_input_weights,
+ const IPortableTensor *cell_to_forget_weights, const IPortableTensor *cell_to_output_weights,
+ const IPortableTensor *input_layer_norm_weights, const IPortableTensor *forget_layer_norm_weights,
+ const IPortableTensor *cell_layer_norm_weights, const IPortableTensor *output_layer_norm_weights,
+ const IPortableTensor *aux_input, const IPortableTensor *aux_input_to_input_weights,
+ const IPortableTensor *aux_input_to_forget_weights,
+ const IPortableTensor *aux_input_to_cell_weights,
+ const IPortableTensor *aux_input_to_output_weights, const IPortableTensor *input_gate_bias,
+ const IPortableTensor *forget_gate_bias, const IPortableTensor *cell_gate_bias,
+ const IPortableTensor *output_gate_bias, const IPortableTensor *projection_weights,
+ const IPortableTensor *projection_bias, const IPortableTensor *output_state_in,
+ const IPortableTensor *cell_state_in, const ir::operation::LSTM::Param ¶ms,
+ bool forward_sequence, bool time_major, int output_offset, IPortableTensor *scratch_buffer,
+ IPortableTensor *output_state, IPortableTensor *cell_state, IPortableTensor *output,
+ bool has_output_state_data, bool has_cell_state_data)
{
_input = input;
_input_to_input_weights = input_to_input_weights;
public:
void LSTMFloat();
- void configure(const IPortableTensor *input, const IPortableTensor *input_to_input_weights,
- const IPortableTensor *input_to_forget_weights,
- const IPortableTensor *input_to_cell_weights,
- const IPortableTensor *input_to_output_weights,
- const IPortableTensor *recurrent_to_input_weights,
- const IPortableTensor *recurrent_to_forget_weights,
- const IPortableTensor *recurrent_to_cell_weights,
- const IPortableTensor *recurrent_to_output_weights,
- const IPortableTensor *cell_to_input_weights,
- const IPortableTensor *cell_to_forget_weights,
- const IPortableTensor *cell_to_output_weights,
- const IPortableTensor *input_layer_norm_weights,
- const IPortableTensor *forget_layer_norm_weights,
- const IPortableTensor *cell_layer_norm_weights,
- const IPortableTensor *output_layer_norm_weights, const IPortableTensor *aux_input,
- const IPortableTensor *aux_input_to_input_weights,
- const IPortableTensor *aux_input_to_forget_weights,
- const IPortableTensor *aux_input_to_cell_weights,
- const IPortableTensor *aux_input_to_output_weights,
- const IPortableTensor *input_gate_bias, const IPortableTensor *forget_gate_bias,
- const IPortableTensor *cell_gate_bias, const IPortableTensor *output_gate_bias,
- const IPortableTensor *projection_weights, const IPortableTensor *projection_bias,
- const IPortableTensor *output_state_in, const IPortableTensor *cell_state_in,
- const ir::operation::LSTM::Param ¶ms, bool forward_sequence, bool time_major,
- int32_t output_offset, IPortableTensor *scratch_buffer,
- IPortableTensor *output_state, IPortableTensor *cell_state,
- IPortableTensor *output, bool has_output_state_data, bool has_cell_state_data);
+ void configure(
+ const IPortableTensor *input, const IPortableTensor *input_to_input_weights,
+ const IPortableTensor *input_to_forget_weights, const IPortableTensor *input_to_cell_weights,
+ const IPortableTensor *input_to_output_weights,
+ const IPortableTensor *recurrent_to_input_weights,
+ const IPortableTensor *recurrent_to_forget_weights,
+ const IPortableTensor *recurrent_to_cell_weights,
+ const IPortableTensor *recurrent_to_output_weights,
+ const IPortableTensor *cell_to_input_weights, const IPortableTensor *cell_to_forget_weights,
+ const IPortableTensor *cell_to_output_weights, const IPortableTensor *input_layer_norm_weights,
+ const IPortableTensor *forget_layer_norm_weights,
+ const IPortableTensor *cell_layer_norm_weights,
+ const IPortableTensor *output_layer_norm_weights, const IPortableTensor *aux_input,
+ const IPortableTensor *aux_input_to_input_weights,
+ const IPortableTensor *aux_input_to_forget_weights,
+ const IPortableTensor *aux_input_to_cell_weights,
+ const IPortableTensor *aux_input_to_output_weights, const IPortableTensor *input_gate_bias,
+ const IPortableTensor *forget_gate_bias, const IPortableTensor *cell_gate_bias,
+ const IPortableTensor *output_gate_bias, const IPortableTensor *projection_weights,
+ const IPortableTensor *projection_bias, const IPortableTensor *output_state_in,
+ const IPortableTensor *cell_state_in, const ir::operation::LSTM::Param ¶ms,
+ bool forward_sequence, bool time_major, int32_t output_offset, IPortableTensor *scratch_buffer,
+ IPortableTensor *output_state, IPortableTensor *cell_state, IPortableTensor *output,
+ bool has_output_state_data, bool has_cell_state_data);
void run() override;
nnfw::cker::SoftmaxParams op_params;
op_params.beta = _beta;
op_params.axis = _axis;
- nnfw::cker::LogSoftmax(op_params, getTensorShape(_input),
- reinterpret_cast<const float *>(_input->buffer()), getTensorShape(_output),
- reinterpret_cast<float *>(_output->buffer()));
+ nnfw::cker::LogSoftmax(op_params, getShape(_input), getBuffer<float>(_input), getShape(_output),
+ getBuffer<float>(_output));
}
void LogSoftMaxLayer::logsoftmaxQuant8()
op_params.beta = _beta;
op_params.axis = _axis;
op_params.table = _table;
- op_params.zero_point = _output->data_offset();
+ op_params.zero_point = _output->data_zero_point();
op_params.scale = _output->data_scale();
- nnfw::cker::LogSoftmax(op_params, _input->data_scale(), getTensorShape(_input),
- reinterpret_cast<const uint8_t *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
+ nnfw::cker::LogSoftmax(op_params, _input->data_scale(), getShape(_input),
+ getBuffer<uint8_t>(_input), getShape(_output),
+ getBuffer<uint8_t>(_output));
}
void LogSoftMaxLayer::configure(const IPortableTensor *input, const float beta, const int axis,
{
MatrixBandPartLayer::MatrixBandPartLayer()
- : _input(nullptr), _num_lower_diag(nullptr), _num_upper_diag(nullptr), _output(nullptr)
+ : _input(nullptr), _num_lower_diag(nullptr), _num_upper_diag(nullptr), _output(nullptr)
{
// DO NOTHING
}
if (_num_lower_diag->data_type() == OperandType::INT64)
{
nnfw::cker::MatrixBandPart<int64_t>(
- *reinterpret_cast<const int64_t *>(_num_lower_diag->buffer()),
- *reinterpret_cast<const int64_t *>(_num_upper_diag->buffer()), getTensorShape(_input),
- reinterpret_cast<const float *>(_input->buffer()), getTensorShape(_output),
- reinterpret_cast<float *>(_output->buffer()));
+ *getBuffer<int64_t>(_num_lower_diag), *getBuffer<int64_t>(_num_upper_diag), getShape(_input),
+ getBuffer<float>(_input), getShape(_output), getBuffer<float>(_output));
}
else
{
nnfw::cker::MatrixBandPart<int32_t>(
- *reinterpret_cast<const int32_t *>(_num_lower_diag->buffer()),
- *reinterpret_cast<const int32_t *>(_num_upper_diag->buffer()), getTensorShape(_input),
- reinterpret_cast<const float *>(_input->buffer()), getTensorShape(_output),
- reinterpret_cast<float *>(_output->buffer()));
+ *getBuffer<int32_t>(_num_lower_diag), *getBuffer<int32_t>(_num_upper_diag), getShape(_input),
+ getBuffer<float>(_input), getShape(_output), getBuffer<float>(_output));
}
}
void MeanLayer::MeanFloat32()
{
- const auto inputShape = getTensorShape(_input);
+ const auto inputShape = getShape(_input);
const auto axisVec = getReducerAxes(_axes);
bool axis_is_1_and_2 =
- _keep_dims && inputShape.DimensionsCount() == 4 && axisVec.size() == 2 &&
- ((axisVec[0] == 1 && axisVec[1] == 2) || (axisVec[0] == 2 && axisVec[1] == 1));
+ _keep_dims && inputShape.DimensionsCount() == 4 && axisVec.size() == 2 &&
+ ((axisVec[0] == 1 && axisVec[1] == 2) || (axisVec[0] == 2 && axisVec[1] == 1));
if (axis_is_1_and_2)
{
- nnfw::cker::MeanAxis1And2(inputShape, reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output),
- reinterpret_cast<float *>(_output->buffer()));
+ nnfw::cker::MeanAxis1And2(inputShape, getBuffer<float>(_input), getShape(_output),
+ getBuffer<float>(_output));
}
else
{
- nnfw::cker::Mean(inputShape, reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
- axisVec);
+ nnfw::cker::Mean(inputShape, getBuffer<float>(_input), getShape(_output),
+ getBuffer<float>(_output), axisVec);
}
}
void MeanLayer::MeanQuant8()
{
- nnfw::cker::MeanQ8Asymm(getTensorShape(_input),
- reinterpret_cast<const uint8_t *>(_input->buffer()), _input->data_scale(),
- _input->data_offset(), getTensorShape(_output),
- reinterpret_cast<uint8_t *>(_output->buffer()), _output->data_scale(),
- _output->data_offset(), getReducerAxes(_axes));
+ nnfw::cker::MeanQ8Asymm(getShape(_input), getBuffer<uint8_t>(_input), _input->data_scale(),
+ _input->data_zero_point(), getShape(_output), getBuffer<uint8_t>(_output),
+ _output->data_scale(), _output->data_zero_point(), getReducerAxes(_axes));
}
void MeanLayer::configure(const IPortableTensor *input, const IPortableTensor *axes,
{
// It assumes index is int32_t type.
nnfw::cker::OneHot<T, int32_t>(
- *reinterpret_cast<const int32_t *>(_depth->buffer()),
- *reinterpret_cast<T *>(_on_value->buffer()), *reinterpret_cast<T *>(_off_value->buffer()),
- _axis, getTensorShape(_indices), reinterpret_cast<const int32_t *>(_indices->buffer()),
- getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()));
+ *getBuffer<int32_t>(_depth), *getBuffer<T>(_on_value), *getBuffer<T>(_off_value), _axis,
+ getShape(_indices), getBuffer<int32_t>(_indices), getShape(_output), getBuffer<T>(_output));
}
void OneHotLayer::configure(const IPortableTensor *indices, const IPortableTensor *depth,
{
public:
OneHotLayer()
- : _indices(nullptr), _depth(nullptr), _on_value(nullptr), _off_value(nullptr),
- _output(nullptr), _axis(-1)
+ : _indices(nullptr), _depth(nullptr), _on_value(nullptr), _off_value(nullptr), _output(nullptr),
+ _axis(-1)
{
// DO NOTHING
}
uint32_t getNumberOfDimensions(const IPortableTensor *tensor)
{
assert(tensor);
- return tensor->num_dimensions();
+ return tensor->getShape().rank();
}
uint32_t getNumberOfElements(const IPortableTensor *tensor)
{
assert(tensor);
uint32_t count = 1;
- for (size_t i = 0; i < tensor->num_dimensions(); i++)
+ auto shape = tensor->getShape();
+ for (int i = 0; i < shape.rank(); i++)
{
- count *= tensor->dimension(i);
+ count *= shape.dim(i);
}
return count;
}
uint32_t getSizeOfDimension(const IPortableTensor *tensor, uint32_t dimensionIdx)
{
assert(tensor);
- if (dimensionIdx >= tensor->num_dimensions())
+ auto shape = tensor->getShape();
+ if (dimensionIdx >= static_cast<uint32_t>(shape.rank()))
{
// TODO, log the error
return 0;
}
- return tensor->dimension(dimensionIdx);
+ return shape.dim(dimensionIdx);
}
void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
*multiplier = input_product_scale / output_scale;
}
+void GetQuantizedConvolutionMultipliersAndShifts(
+ float input_scale, float output_scale, const float *filter_scales, size_t filter_scales_size,
+ int num_channels, std::vector<int32_t> &per_channel_output_multiplier,
+ std::vector<int> &per_channel_output_shift)
+{
+ // Originates from tflite's PopulateConvolutionQuantizationParams()
+ per_channel_output_multiplier.resize(num_channels);
+ per_channel_output_shift.resize(num_channels);
+
+ const bool is_per_channel = filter_scales_size > 1;
+ auto per_channel_multiplier = per_channel_output_multiplier.data();
+ auto per_channel_shift = per_channel_output_shift.data();
+ for (int i = 0; i < num_channels; ++i)
+ {
+ // If per-tensor quantization parameter is specified, broadcast it along the
+ // quantization dimension (channels_out).
+ const float scale = is_per_channel ? filter_scales[i] : filter_scales[0];
+ const double filter_scale = static_cast<double>(scale);
+ const double effective_output_scale =
+ static_cast<double>(input_scale) * filter_scale / static_cast<double>(output_scale);
+ int32_t significand;
+ int channel_shift;
+ QuantizeMultiplier(effective_output_scale, &significand, &channel_shift);
+ per_channel_multiplier[i] = significand;
+ per_channel_shift[i] = channel_shift;
+ }
+}
+
void QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t *quantized_multiplier,
int *left_shift)
{
*quantized_multiplier = static_cast<int32_t>(q_fixed);
}
-void CalculateActivationRangeUint8(ir::Activation activation, const IPortableTensor *output,
- int32_t *act_min, int32_t *act_max)
+void CalculateActivationRangeQuantized(ir::Activation activation, const IPortableTensor *output,
+ int32_t *act_min, int32_t *act_max)
{
- const int32_t qmin = std::numeric_limits<uint8_t>::min();
- const int32_t qmax = std::numeric_limits<uint8_t>::max();
+ int32_t qmin = 0;
+ int32_t qmax = 0;
+
+ switch (output->data_type())
+ {
+ case OperandType::QUANT_UINT8_ASYMM:
+ qmin = std::numeric_limits<uint8_t>::min();
+ qmax = std::numeric_limits<uint8_t>::max();
+ break;
+ case OperandType::QUANT_INT8_ASYMM:
+ case OperandType::QUANT_INT8_SYMM:
+ qmin = std::numeric_limits<int8_t>::min();
+ qmax = std::numeric_limits<int8_t>::max();
+ break;
+ default:
+ throw std::runtime_error("CalculateActivationRangeQuantized: Not supported operand type.");
+ }
+
const auto scale = output->data_scale();
- const auto zero_point = output->data_offset();
+ const auto zero_point = output->data_zero_point();
auto quantize = [scale, zero_point](float f) {
return zero_point + static_cast<int32_t>(std::round(f / scale));
};
if (getNumberOfDimensions(input1) != getNumberOfDimensions(input2))
return false;
+ auto shape1 = input1->getShape();
+ auto shape2 = input2->getShape();
for (uint32_t i = 0; i < getNumberOfDimensions(input1); i++)
- if (input1->dimension(i) != input2->dimension(i))
+ if (shape1.dim(i) != shape2.dim(i))
return false;
return true;
{
std::vector<int32_t> ret;
+ auto axes_vals = (axes->getShape().rank() == 0) ? 1 : axes->getShape().dim(0);
assert(axes->layout() == ir::Layout::NHWC);
- assert(axes->dimension(0) == axes->getShape().num_elements());
+ assert(static_cast<size_t>(axes_vals) == axes->getShape().num_elements());
switch (axes->data_type())
{
case ir::DataType::INT32:
{
- for (size_t i = 0; i < axes->dimension(0); ++i)
- ret.emplace_back(*(reinterpret_cast<const int32_t *>(axes->buffer()) + i));
+ for (int i = 0; i < axes_vals; ++i)
+ ret.emplace_back(*(getBuffer<int32_t>(axes) + i));
break;
}
case ir::DataType::INT64:
{
- for (size_t i = 0; i < axes->dimension(0); ++i)
- ret.emplace_back(*(reinterpret_cast<const int64_t *>(axes->buffer()) + i));
+ for (int i = 0; i < axes_vals; ++i)
+ ret.emplace_back(*(getBuffer<int64_t>(axes) + i));
break;
}
default:
assert(tensor);
const int32_t extended_rank = 4;
int32_t raw_shape[extended_rank];
- uint32_t src = extended_rank - tensor->num_dimensions();
+ auto shape = tensor->getShape();
+ uint32_t src = extended_rank - shape.rank();
for (uint32_t i = 0; i < extended_rank; ++i)
{
if (i < src)
}
else
{
- raw_shape[i] = tensor->dimension(i - src);
+ raw_shape[i] = shape.dim(i - src);
}
}
return nnfw::cker::Shape(extended_rank, raw_shape);
}
-inline nnfw::cker::Shape getTensorShape(const IPortableTensor *tensor)
+inline nnfw::cker::Shape getShape(const IPortableTensor *tensor)
{
if (tensor == nullptr)
return nnfw::cker::Shape();
void QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t *quantized_multiplier,
int *left_shift);
+void GetQuantizedConvolutionMultipliersAndShifts(
+ float input_scale, float output_scale, const float *filter_scales, size_t filter_scales_size,
+ int num_channels, std::vector<int32_t> &per_channel_output_multiplier,
+ std::vector<int> &per_channel_output_shift);
+
template <typename T>
void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
{
}
}
-void CalculateActivationRangeUint8(ir::Activation activation, const IPortableTensor *output,
- int32_t *act_min, int32_t *act_max);
+void CalculateActivationRangeQuantized(ir::Activation activation, const IPortableTensor *output,
+ int32_t *act_min, int32_t *act_max);
bool HaveSameShapes(const IPortableTensor *input1, const IPortableTensor *input2);
std::vector<int32_t> getReducerAxes(const IPortableTensor *axes);
+template <typename T> const T *getBuffer(const IPortableTensor *tensor)
+{
+ return reinterpret_cast<const T *>(tensor->buffer());
+}
+
+template <typename T> T *getBuffer(IPortableTensor *tensor)
+{
+ return reinterpret_cast<T *>(tensor->buffer());
+}
+
} // namespace ops
} // namespace cpu
} // namespace backend
for (uint32_t i = 0; i < num_inputs; i++)
{
- inputDims.push_back(getTensorShape(_inputs[i]));
+ inputDims.push_back(getShape(_inputs[i]));
inputDimsPtr.push_back(&inputDims[i]);
}
for (const auto input : _inputs)
{
- inputPtrs.emplace_back(reinterpret_cast<const T *>(input->buffer()));
+ inputPtrs.emplace_back(getBuffer<T>(input));
}
- nnfw::cker::Pack<T>(op_params, inputPtrs.data(), getTensorShape(_output),
- reinterpret_cast<T *>(_output->buffer()));
+ nnfw::cker::Pack<T>(op_params, inputPtrs.data(), getShape(_output), getBuffer<T>(_output));
}
void PackLayer::configure(const std::vector<const IPortableTensor *> &inputs, int32_t axis,
{
PadLayer::PadLayer()
- : _input(nullptr), _output(nullptr), _padData(), _padRank(), _constantValueData()
+ : _input(nullptr), _output(nullptr), _padData(), _padRank(), _constantValueData()
{
// DO NOTHING
}
template <typename T> void PadLayer::padImpl(const T *constant_value_data)
{
- nnfw::cker::Pad<T>(_padData, _padRank, getTensorShape(_input),
- reinterpret_cast<const T *>(_input->buffer()), getTensorShape(_output),
- reinterpret_cast<T *>(_output->buffer()), constant_value_data);
+ nnfw::cker::Pad<T>(_padData, _padRank, getShape(_input), getBuffer<T>(_input), getShape(_output),
+ getBuffer<T>(_output), constant_value_data);
}
void PadLayer::configure(const IPortableTensor *input, IPortableTensor *output,
void PadLayer::run()
{
- if (_input->data_type() == OperandType::FLOAT32)
+ switch (_input->data_type())
{
- padImpl<float>(_constantValueData.f);
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- if (_constantValueData.u8 == nullptr)
- {
- uint8_t pad_value = static_cast<uint8_t>(_output->data_offset());
- padImpl<uint8_t>(&pad_value);
- }
- else
- {
- padImpl<uint8_t>(_constantValueData.u8);
- }
- }
- else
- {
- throw std::runtime_error{"Pad: unsupported data type"};
+ case OperandType::FLOAT32:
+ padImpl<float>(_constantValueData.f);
+ break;
+ case OperandType::QUANT_UINT8_ASYMM:
+ if (_constantValueData.u8 == nullptr)
+ {
+ uint8_t pad_value = static_cast<uint8_t>(_output->data_zero_point());
+ padImpl<uint8_t>(&pad_value);
+ }
+ else
+ {
+ padImpl<uint8_t>(_constantValueData.u8);
+ }
+ break;
+ case OperandType::QUANT_INT8_ASYMM:
+ if (_constantValueData.i8 == nullptr)
+ {
+ int8_t pad_value = static_cast<int8_t>(_output->data_zero_point());
+ padImpl<int8_t>(&pad_value);
+ }
+ else
+ {
+ padImpl<int8_t>(_constantValueData.i8);
+ }
+ break;
+ default:
+ throw std::runtime_error{"Pad: unsupported data type"};
}
}
void avgPool2D(const nnfw::cker::PoolParams ¶ms, const IPortableTensor *input,
IPortableTensor *output)
{
- nnfw::cker::AveragePool<T>(params, getTensorShape(input),
- reinterpret_cast<const T *>(input->buffer()), getTensorShape(output),
- reinterpret_cast<T *>(output->buffer()));
+ nnfw::cker::AveragePool<T>(params, getShape(input), getBuffer<T>(input), getShape(output),
+ getBuffer<T>(output));
}
template <typename T>
void maxPool2D(const nnfw::cker::PoolParams ¶ms, const IPortableTensor *input,
IPortableTensor *output)
{
- nnfw::cker::MaxPool<T>(params, getTensorShape(input),
- reinterpret_cast<const T *>(input->buffer()), getTensorShape(output),
- reinterpret_cast<T *>(output->buffer()));
+ nnfw::cker::MaxPool<T>(params, getShape(input), getBuffer<T>(input), getShape(output),
+ getBuffer<T>(output));
}
template <typename T>
_output = output;
POOLING_PARAMETERS
- if (_input->data_type() == OperandType::FLOAT32)
- {
- float output_activation_min = 0;
- float output_activation_max = 0;
- CalculateActivationRange<float>(activation, &output_activation_min, &output_activation_max);
- op_params.float_activation_min = output_activation_min;
- op_params.float_activation_max = output_activation_max;
- _kernel = generateKernelGeneric<float>(op_params, op_type);
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- int32_t output_activation_min = 0;
- int32_t output_activation_max = 0;
- CalculateActivationRangeUint8(activation, _output, &output_activation_min,
- &output_activation_max);
- op_params.quantized_activation_min = output_activation_min;
- op_params.quantized_activation_max = output_activation_max;
- _kernel = generateKernelGeneric<uint8_t>(op_params, op_type);
- }
- else
+ switch (_input->data_type())
{
- throw std::runtime_error{"Pool: unsupported data type"};
+ case OperandType::FLOAT32:
+ {
+ float output_activation_min = 0;
+ float output_activation_max = 0;
+ CalculateActivationRange<float>(activation, &output_activation_min, &output_activation_max);
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
+
+ _kernel = generateKernelGeneric<float>(op_params, op_type);
+ break;
+ }
+ case OperandType::QUANT_UINT8_ASYMM:
+ {
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+ CalculateActivationRangeQuantized(activation, _output, &output_activation_min,
+ &output_activation_max);
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+ _kernel = generateKernelGeneric<uint8_t>(op_params, op_type);
+ break;
+ }
+ case OperandType::QUANT_INT8_ASYMM:
+ {
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+ CalculateActivationRangeQuantized(activation, _output, &output_activation_min,
+ &output_activation_max);
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+ _kernel = generateKernelGeneric<int8_t>(op_params, op_type);
+ break;
+ }
+ default:
+ throw std::runtime_error{"Pool: unsupported data type"};
}
}
if (!HaveSameShapes(_lhs, _rhs))
{
nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::POW>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+ op_params, getShape(_lhs), getBuffer<float>(_lhs), getShape(_rhs), getBuffer<float>(_rhs),
+ getShape(_output), getBuffer<float>(_output));
return;
}
- nnfw::cker::powImpl(getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+ nnfw::cker::powImpl(getShape(_lhs), getBuffer<float>(_lhs), getShape(_rhs),
+ getBuffer<float>(_rhs), getShape(_output), getBuffer<float>(_output));
}
void PowLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OperationUtils.h"
+#include "QuantizeLayer.h"
+
+#include <cker/operation/Dequantize.h>
+#include <cker/operation/Erf.h>
+#include <cker/operation/Exp.h>
+#include <cker/operation/LogicalNot.h>
+#include <cker/operation/Quantize.h>
+#include <cker/operation/Round.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+template <typename InputT, typename OutputT>
+void affineQuantize(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Quantize(getShape(input), getBuffer<InputT>(input), getShape(output),
+ getBuffer<OutputT>(output), output->data_scale(), output->data_zero_point());
+}
+
+void QuantizeLayer::configure(const IPortableTensor *input, IPortableTensor *output)
+{
+ assert(input != nullptr);
+ assert(output != nullptr);
+
+ _input = input;
+ _output = output;
+
+ if ((_input->data_type() == OperandType::FLOAT32))
+ {
+ // DO NOTHING
+ }
+ else if (((input->data_type() == OperandType::QUANT_UINT8_ASYMM) &&
+ (output->data_type() == OperandType::QUANT_INT8_ASYMM)) ||
+ ((input->data_type() == OperandType::QUANT_INT8_ASYMM) &&
+ (output->data_type() == OperandType::QUANT_UINT8_ASYMM)))
+ {
+ const double effective_output_scale =
+ static_cast<double>(input->data_scale()) / static_cast<double>(output->data_scale());
+ QuantizeMultiplier(effective_output_scale, &_output_multiplier, &_output_shift);
+ }
+ else
+ {
+ throw std::runtime_error{"Quantize: Unsupported data type"};
+ }
+}
+
+void QuantizeLayer::run()
+{
+ if ((_input->data_type() == OperandType::FLOAT32))
+ {
+ affineQuantize<float, uint8_t>(_input, _output);
+ }
+ else if ((_input->data_type() == OperandType::QUANT_UINT8_ASYMM) &&
+ (_output->data_type() == OperandType::QUANT_INT8_ASYMM))
+ {
+ nnfw::cker::Requantize<uint8_t, int8_t>(
+ getBuffer<uint8_t>(_input), MatchingFlatSize(getShape(_input), getShape(_output)),
+ _output_multiplier, _output_shift, _input->data_zero_point(), _output->data_zero_point(),
+ getBuffer<int8_t>(_output));
+ }
+ else if ((_input->data_type() == OperandType::QUANT_INT8_ASYMM) &&
+ (_output->data_type() == OperandType::QUANT_UINT8_ASYMM))
+ {
+ nnfw::cker::Requantize<int8_t, uint8_t>(
+ getBuffer<int8_t>(_input), MatchingFlatSize(getShape(_input), getShape(_output)),
+ _output_multiplier, _output_shift, _input->data_zero_point(), _output->data_zero_point(),
+ getBuffer<uint8_t>(_output));
+ }
+ else
+ {
+ throw std::runtime_error{"Quantize: Unsupported data type"};
+ }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+class QuantizeLayer : public ::onert::exec::IFunction
+{
+public:
+ QuantizeLayer() : _input(nullptr), _output(nullptr), _output_multiplier(0), _output_shift(0)
+ {
+ // DO NOTHING
+ }
+
+public:
+ void configure(const IPortableTensor *input, IPortableTensor *output);
+ void run() override;
+
+private:
+ const IPortableTensor *_input;
+ IPortableTensor *_output;
+ int32_t _output_multiplier;
+ int _output_shift;
+};
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
switch (_output->data_type())
{
case OperandType::FLOAT32:
- nnfw::cker::Range<float>(reinterpret_cast<float *>(_start->buffer()),
- reinterpret_cast<float *>(_limit->buffer()),
- reinterpret_cast<float *>(_delta->buffer()),
- reinterpret_cast<float *>(_output->buffer()));
+ nnfw::cker::Range<float>(getBuffer<float>(_start), getBuffer<float>(_limit),
+ getBuffer<float>(_delta), getBuffer<float>(_output));
break;
case OperandType::INT32:
- nnfw::cker::Range<int32_t>(reinterpret_cast<int32_t *>(_start->buffer()),
- reinterpret_cast<int32_t *>(_limit->buffer()),
- reinterpret_cast<int32_t *>(_delta->buffer()),
- reinterpret_cast<int32_t *>(_output->buffer()));
+ nnfw::cker::Range<int32_t>(getBuffer<int32_t>(_start), getBuffer<int32_t>(_limit),
+ getBuffer<int32_t>(_delta), getBuffer<int32_t>(_output));
break;
default:
throw std::runtime_error{"Range: unsupported data type"};
void RankLayer::run()
{
- int32_t *output_data = reinterpret_cast<int32_t *>(_output->buffer());
- output_data[0] = _input->num_dimensions();
+ int32_t *output_data = getBuffer<int32_t>(_output);
+ output_data[0] = _input->getShape().rank();
}
} // namespace ops
bool keep_dims, T init_value, nnfw::cker::Reduce &reduce_kernel,
T reducer(const T current, const T in))
{
- reduce_kernel.prepare(input->num_dimensions(), axes.size());
- bool result = reduce_kernel.ReduceGeneric<T>(
- getTensorShape(input), reinterpret_cast<const T *>(input->buffer()), getTensorShape(output),
- reinterpret_cast<T *>(output->buffer()), axes, keep_dims, init_value, reducer);
+ reduce_kernel.prepare(input->getShape().rank(), axes.size());
+ bool result =
+ reduce_kernel.ReduceGeneric<T>(getShape(input), getBuffer<T>(input), getShape(output),
+ getBuffer<T>(output), axes, keep_dims, init_value, reducer);
if (!result)
{
break;
case ReduceType::kMax:
return std::bind(
- &evalLogic<T>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
- keep_dims, std::numeric_limits<T>::lowest(), reduce_kernel,
- [](const T current, const T in) -> T { return (in > current) ? in : current; });
+ &evalLogic<T>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+ keep_dims, std::numeric_limits<T>::lowest(), reduce_kernel,
+ [](const T current, const T in) -> T { return (in > current) ? in : current; });
break;
case ReduceType::kMin:
return std::bind(
- &evalLogic<T>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
- keep_dims, std::numeric_limits<T>::max(), reduce_kernel,
- [](const T current, const T in) -> T { return (in < current) ? in : current; });
+ &evalLogic<T>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+ keep_dims, std::numeric_limits<T>::max(), reduce_kernel,
+ [](const T current, const T in) -> T { return (in < current) ? in : current; });
break;
default:
throw std::runtime_error{"Reduce: Unsupported reduce type"};
nnfw::cker::Reduce &reduce_kernel)
{
const bool same_scale = (input->data_scale() == output->data_scale() &&
- input->data_offset() == output->data_offset());
+ input->data_zero_point() == output->data_zero_point());
- reduce_kernel.prepare(input->num_dimensions(), axes.size());
+ reduce_kernel.prepare(input->getShape().rank(), axes.size());
if (!same_scale)
{
std::vector<int32_t> temp_sum(output->getShape().num_elements());
bool result = reduce_kernel.QuantizedMeanOrSum<uint8_t, int32_t>(
- reinterpret_cast<const uint8_t *>(input->buffer()), input->data_offset(),
- input->data_scale(), getTensorShape(input), reinterpret_cast<uint8_t *>(output->buffer()),
- output->data_offset(), output->data_scale(), getTensorShape(output), axes, keep_dims,
- temp_sum.data(), true, [](const int32_t current, const uint8_t in) -> int32_t {
- const int32_t actual_in = static_cast<int32_t>(in);
- return current + actual_in;
- });
+ getBuffer<uint8_t>(input), input->data_zero_point(), input->data_scale(), getShape(input),
+ getBuffer<uint8_t>(output), output->data_zero_point(), output->data_scale(), getShape(output),
+ axes, keep_dims, temp_sum.data(), true,
+ [](const int32_t current, const uint8_t in) -> int32_t {
+ const int32_t actual_in = static_cast<int32_t>(in);
+ return current + actual_in;
+ });
if (!result)
{
} // namespace
ReduceLayer::ReduceLayer()
- : _input(nullptr), _axes(nullptr), _output(nullptr), _reduce_kernel(new nnfw::cker::Reduce()),
- _kernel(), _reduceType(ReduceType::kInvalid)
+ : _input(nullptr), _axes(nullptr), _output(nullptr), _reduce_kernel(new nnfw::cker::Reduce()),
+ _kernel(), _reduceType(ReduceType::kInvalid)
{
// DO NOTHING
}
{
const auto axes = getReducerAxes(_axes);
#ifdef USE_NEON
- int32_t rank = _input->num_dimensions();
+ int32_t rank = _input->getShape().rank();
if (_input->data_type() == ir::DataType::FLOAT32 && _reduceType == ReduceType::kSum &&
axes.size() == 1 && (axes[0] == -1 || axes[0] == rank - 1))
{
- OptimizedReduceSum(reinterpret_cast<const float *>(_input->buffer()), getTensorShape(_input),
- reinterpret_cast<float *>(_output->buffer()));
+ OptimizedReduceSum(getBuffer<float>(_input), getShape(_input), getBuffer<float>(_output));
return;
}
#endif // NEON
std::unique_ptr<nnfw::cker::Reduce> _reduce_kernel;
std::function<void(const IPortableTensor *input, IPortableTensor *output,
const std::vector<int> &axes)>
- _kernel;
+ _kernel;
ReduceType _reduceType;
};
{
ResizeBilinearLayer::ResizeBilinearLayer()
- : _input(nullptr), _output(nullptr), _size(nullptr), _output_height(0), _output_width(0),
- _align_corners(false), _half_pixel_centers(false)
+ : _input(nullptr), _output(nullptr), _size(nullptr), _output_height(0), _output_width(0),
+ _align_corners(false), _half_pixel_centers(false)
{
// DO NOTHING
}
}
else
{
- const auto size_buf = reinterpret_cast<const int32_t *>(_size->buffer());
+ const auto size_buf = getBuffer<int32_t>(_size);
params.output_height = size_buf[0];
params.output_width = size_buf[1];
}
switch (_input->data_type())
{
case OperandType::FLOAT32:
- nnfw::cker::ResizeBilinear(
- params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+ nnfw::cker::ResizeBilinear(params, getShape(_input), getBuffer<float>(_input),
+ getShape(_output), getBuffer<float>(_output));
break;
case OperandType::QUANT_UINT8_ASYMM:
- nnfw::cker::ResizeBilinear(
- params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
+ nnfw::cker::ResizeBilinear(params, getShape(_input), getBuffer<uint8_t>(_input),
+ getShape(_output), getBuffer<uint8_t>(_output));
+ break;
+
+ case OperandType::QUANT_INT8_ASYMM:
+ nnfw::cker::ResizeBilinear(params, getShape(_input), getBuffer<int8_t>(_input),
+ getShape(_output), getBuffer<int8_t>(_output));
break;
case OperandType::UINT8:
{
throw std::runtime_error{"Reverse: only support 1 axis"};
}
- int32_t axis = *(reinterpret_cast<int32_t *>(_axis->buffer()));
+ int32_t axis = *getBuffer<int32_t>(_axis);
if (axis < 0)
{
- axis += _input->num_dimensions();
+ axis += _input->getShape().rank();
}
switch (_input->data_type())
{
case OperandType::FLOAT32:
- nnfw::cker::Reverse<float>(
- axis, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+ nnfw::cker::Reverse<float>(axis, getShape(_input), getBuffer<float>(_input),
+ getShape(_output), getBuffer<float>(_output));
break;
default:
throw std::runtime_error{"Reverse: unsupported data type"};
{
SelectLayer::SelectLayer()
- : _cond(nullptr), _input_true(nullptr), _input_false(nullptr), _output(nullptr)
+ : _cond(nullptr), _input_true(nullptr), _input_false(nullptr), _output(nullptr)
{
// DO NOTHING
}
void SelectLayer::run()
{
-#define KERNEL_SELECT(type, op) \
- nnfw::cker::op(getTensorShape(_cond), reinterpret_cast<uint8_t *>(_cond->buffer()), \
- getTensorShape(_input_true), reinterpret_cast<type *>(_input_true->buffer()), \
- getTensorShape(_input_false), reinterpret_cast<type *>(_input_false->buffer()), \
- getTensorShape(_output), reinterpret_cast<type *>(_output->buffer()));
+#define KERNEL_SELECT(type, op) \
+ nnfw::cker::op(getShape(_cond), getBuffer<uint8_t>(_cond), getShape(_input_true), \
+ getBuffer<type>(_input_true), getShape(_input_false), \
+ getBuffer<type>(_input_false), getShape(_output), getBuffer<type>(_output));
#define KERNEL_SWITCH(type, op) \
switch (type) \
auto input_type = _input_true->data_type();
bool require_broadcast =
- !HaveSameShapes(_input_true, _cond) || !HaveSameShapes(_input_false, _cond);
- bool rank_one_select = ((_input_true->num_dimensions() == 1) && !require_broadcast);
+ !HaveSameShapes(_input_true, _cond) || !HaveSameShapes(_input_false, _cond);
+ bool rank_one_select = ((_input_true->getShape().rank() == 1) && !require_broadcast);
if (rank_one_select)
{
template <typename T> void GetRawShape(const IPortableTensor *input, T *output_data)
{
- for (uint32_t i = 0; i < input->num_dimensions(); ++i)
+ auto shape = input->getShape();
+ for (int i = 0; i < shape.rank(); ++i)
{
- output_data[i] = static_cast<T>(input->dimension(i));
+ output_data[i] = static_cast<T>(shape.dim(i));
}
}
{
if (_output->data_type() == OperandType::UINT32)
{
- GetRawShape(_input, reinterpret_cast<uint32_t *>(_output->buffer()));
+ GetRawShape(_input, getBuffer<uint32_t>(_output));
}
else if (_output->data_type() == OperandType::INT32)
{
- GetRawShape(_input, reinterpret_cast<int32_t *>(_output->buffer()));
+ GetRawShape(_input, getBuffer<int32_t>(_output));
}
else if (_output->data_type() == OperandType::INT64)
{
- GetRawShape(_input, reinterpret_cast<int64_t *>(_output->buffer()));
+ GetRawShape(_input, getBuffer<int64_t>(_output));
}
else
{
{
for (int idx = dimensions - 1; idx >= 0; --idx)
{
- begins->push_back(reinterpret_cast<T *>(begin->buffer())[idx]);
- sizes->push_back(reinterpret_cast<T *>(size->buffer())[idx]);
+ begins->push_back(getBuffer<T>(begin)[idx]);
+ sizes->push_back(getBuffer<T>(size)[idx]);
}
}
begins.reserve(kMaxDim);
sizes.reserve(kMaxDim);
- GetBeginAndSizeVectors<int32_t>(_input->num_dimensions(), _begin, _size, &begins, &sizes);
+ if (_begin->data_type() == OperandType::INT32)
+ {
+ GetBeginAndSizeVectors<int32_t>(_input->getShape().rank(), _begin, _size, &begins, &sizes);
+ }
+ else if (_begin->data_type() == OperandType::INT64)
+ {
+ GetBeginAndSizeVectors<int64_t>(_input->getShape().rank(), _begin, _size, &begins, &sizes);
+ }
+ else
+ {
+ throw std::runtime_error{"Slice: unsupported begin and/or size data type"};
+ }
// begins : 0-based, sizes : 1-based
- for (int i = _input->num_dimensions(); i < kMaxDim; ++i)
+ for (int i = _input->getShape().rank(); i < kMaxDim; ++i)
{
begins.push_back(0);
sizes.push_back(1);
op_params.size[i] = sizes[3 - i];
}
- nnfw::cker::Slice(op_params, getExtendedTensorShape(_input),
- reinterpret_cast<const T *>(_input->buffer()),
- reinterpret_cast<T *>(_output->buffer()));
+ nnfw::cker::Slice(op_params, getExtendedTensorShape(_input), getBuffer<T>(_input),
+ getBuffer<T>(_output));
}
void SliceLayer::configure(const IPortableTensor *input, const IPortableTensor *begin,
if (getNumberOfDimensions(_input) == 1)
{
uint32_t input_size = getNumberOfElements(_input);
- nnfw::cker::Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, 1, _beta,
- reinterpret_cast<float *>(_output->buffer()));
+ nnfw::cker::Softmax(getBuffer<float>(_input), input_size, 1, _beta, getBuffer<float>(_output));
}
else if (getNumberOfDimensions(_input) == 2)
{
throw std::runtime_error("batch_size should not be 0");
uint32_t input_size = getNumberOfElements(_input) / batch_size;
- nnfw::cker::Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, batch_size,
- _beta, reinterpret_cast<float *>(_output->buffer()));
+ nnfw::cker::Softmax(getBuffer<float>(_input), input_size, batch_size, _beta,
+ getBuffer<float>(_output));
}
else if (getNumberOfDimensions(_input) == 4)
{
nnfw::cker::SoftmaxParams op_params;
op_params.beta = _beta;
- nnfw::cker::Softmax(op_params, getTensorShape(_input),
- reinterpret_cast<const float *>(_input->buffer()), getTensorShape(_output),
- reinterpret_cast<float *>(_output->buffer()));
+ nnfw::cker::Softmax(op_params, getShape(_input), getBuffer<float>(_input), getShape(_output),
+ getBuffer<float>(_output));
}
else
{
nnfw::cker::SoftmaxParams op_params;
op_params.beta = _beta;
- nnfw::cker::reference::Softmax(
- op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+ nnfw::cker::reference::Softmax(op_params, getShape(_input), getBuffer<float>(_input),
+ getShape(_output), getBuffer<float>(_output));
}
}
-void SoftMaxLayer::softmaxQuant8()
+template <typename T> void SoftMaxLayer::softmaxQuant8()
{
- nnfw::cker::Shape descrIn4D(4);
-
- if (getNumberOfDimensions(_input) == 2)
- {
- auto batch_size = getSizeOfDimension(_input, 0);
- if (batch_size == 0)
- throw std::runtime_error("batch_size should not be 0");
-
- auto input_size = getNumberOfElements(_input) / batch_size;
- descrIn4D.SetDim(0, batch_size);
- descrIn4D.SetDim(1, 1);
- descrIn4D.SetDim(2, 1);
- descrIn4D.SetDim(3, input_size);
- }
- else if (getNumberOfDimensions(_input) == 4)
- {
- descrIn4D.SetDim(0, _input->dimension(0));
- descrIn4D.SetDim(1, _input->dimension(1));
- descrIn4D.SetDim(2, _input->dimension(2));
- descrIn4D.SetDim(3, _input->dimension(3));
- }
- else
- {
- throw std::runtime_error{"only 2D and 4D tensors supported"};
- }
- if (_output->data_offset() != 0 || _output->data_scale() != 1.f / 256)
- {
- throw std::runtime_error{"incorrect scale / offset for output"};
- }
- static const int32_t kScaledDiffIntegerBits = 5;
- const double input_beta_real_multiplier = std::min(
- 1.0 * _beta * _input->data_scale() * (1 << (31 - kScaledDiffIntegerBits)), (1ll << 31) - 1.0);
- int32_t input_multiplier = 0;
- int32_t input_left_shift = 0;
- QuantizeMultiplierGreaterThanOne(input_beta_real_multiplier, &input_multiplier,
- &input_left_shift);
- float diff_min = -1.0f * CalculateInputRadius(kScaledDiffIntegerBits, input_left_shift);
-
nnfw::cker::SoftmaxParams op_params;
- op_params.input_multiplier = input_multiplier;
- op_params.input_left_shift = input_left_shift;
- op_params.diff_min = diff_min;
- nnfw::cker::Softmax(op_params, descrIn4D, reinterpret_cast<const uint8_t *>(_input->buffer()),
- descrIn4D, reinterpret_cast<uint8_t *>(_output->buffer()));
+ op_params.scale = _output->data_scale();
+ op_params.zero_point = _output->data_zero_point();
+ op_params.uint8_table1 = _uint8_table1;
+ op_params.uint8_table2 = _uint8_table2;
+ op_params.table = _table;
+
+#ifdef TFLITE_SOFTMAX_USE_UINT16_LUT
+ nnfw::cker::SoftmaxInt8LUT<T, T>(op_params, getShape(_input), getBuffer<T>(_input),
+ getShape(_output), getBuffer<T>(_output));
+#else
+ nnfw::cker::Softmax<T, T>(op_params, getShape(_input), getBuffer<T>(_input), getShape(_output),
+ getBuffer<T>(_output));
+#endif
}
void SoftMaxLayer::configure(const IPortableTensor *input, const float beta,
_input = input;
_output = output;
_beta = beta;
+
+ if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM ||
+ _input->data_type() == OperandType::QUANT_INT8_ASYMM)
+ {
+#ifdef TFLITE_SOFTMAX_USE_UINT16_LUT
+ // Only apply when both input & output are uint8/int8 & build with clang
+ // on aarch64.
+ nnfw::cker::PopulateSoftmaxUInt8LookupTable(_uint8_table1, _uint8_table2, _input->data_scale(),
+ _beta);
+#else
+ nnfw::cker::PopulateSoftmaxLookupTable(_table, _input->data_scale(), _beta);
+#endif
+ }
}
void SoftMaxLayer::run()
{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- softmaxFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- softmaxQuant8();
- }
- else
+ switch (_input->data_type())
{
- throw std::runtime_error{"SoftMax: unsupported data type"};
+ case OperandType::FLOAT32:
+ softmaxFloat32();
+ break;
+ case OperandType::QUANT_UINT8_ASYMM:
+ softmaxQuant8<uint8_t>();
+ break;
+ case OperandType::QUANT_INT8_ASYMM:
+ softmaxQuant8<int8_t>();
+ break;
+ default:
+ throw std::runtime_error{"SoftMax: unsupported data type"};
}
}
public:
void softmaxFloat32();
- void softmaxQuant8();
+ template <typename T> void softmaxQuant8();
void configure(const IPortableTensor *input, const float beta, IPortableTensor *output);
IPortableTensor *_output;
float _beta;
+
+ float _table[256];
+ uint8_t _uint8_table1[256];
+ uint8_t _uint8_table2[256];
};
} // namespace ops
namespace ops
{
SpaceToBatchNDLayer::SpaceToBatchNDLayer()
- : _input(nullptr), _block_shape(nullptr), _padding(nullptr), _output(nullptr)
+ : _input(nullptr), _block_shape(nullptr), _padding(nullptr), _output(nullptr)
{
// DO NOTHING
}
void SpaceToBatchNDLayer::checkDimension()
{
const int kSpatialDimensionNum = 2;
- if (_block_shape->dimension(0) != kSpatialDimensionNum)
+ if (_block_shape->getShape().dim(0) != kSpatialDimensionNum)
{
throw std::runtime_error("SpaceToBatchND : block_shape(block_size) tensor's rank is wrong\n");
}
// shape height and width.
for (int dim = 0; dim < kSpatialDimensionNum; ++dim)
{
- int final_dim_size =
- (_input->dimension(dim + 1) + reinterpret_cast<int32_t *>(_padding->buffer())[dim * 2] +
- reinterpret_cast<int32_t *>(_padding->buffer())[dim * 2 + 1]);
+ int final_dim_size = (_input->getShape().dim(dim + 1) + getBuffer<int32_t>(_padding)[dim * 2] +
+ getBuffer<int32_t>(_padding)[dim * 2 + 1]);
- if (final_dim_size % reinterpret_cast<int32_t *>(_block_shape->buffer())[dim] != 0)
+ if (final_dim_size % getBuffer<int32_t>(_block_shape)[dim] != 0)
{
throw std::runtime_error(
- "SpaceToBatchND : padded input's dimension is not a multiple of block size\n");
+ "SpaceToBatchND : padded input's dimension is not a multiple of block size\n");
}
- if ((int32_t)_output->dimension(dim + 1) !=
- final_dim_size / reinterpret_cast<int32_t *>(_block_shape->buffer())[dim])
+ if ((int32_t)_output->getShape().dim(dim + 1) !=
+ final_dim_size / getBuffer<int32_t>(_block_shape)[dim])
{
throw std::runtime_error("SpaceToBatchND : wrong output dimension\n");
}
}
template <> uint32_t SpaceToBatchNDLayer::getPad<float>() { return 0; }
-template <> uint32_t SpaceToBatchNDLayer::getPad<uint8_t>() { return _output->data_offset(); }
+template <> uint32_t SpaceToBatchNDLayer::getPad<uint8_t>() { return _output->data_zero_point(); }
template <typename T> void SpaceToBatchNDLayer::spaceToBatchND()
{
nnfw::cker::SpaceToBatchParams params;
params.output_offset = getPad<T>();
- nnfw::cker::SpaceToBatchND(
- params, getTensorShape(_input), reinterpret_cast<const T *>(_input->buffer()),
- getTensorShape(_block_shape), reinterpret_cast<const int32_t *>(_block_shape->buffer()),
- getTensorShape(_padding), reinterpret_cast<const int32_t *>(_padding->buffer()),
- getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()));
+ nnfw::cker::SpaceToBatchND(params, getShape(_input), getBuffer<T>(_input), getShape(_block_shape),
+ getBuffer<int32_t>(_block_shape), getShape(_padding),
+ getBuffer<int32_t>(_padding), getShape(_output),
+ getBuffer<T>(_output));
}
void SpaceToBatchNDLayer::configure(const IPortableTensor *input,
nnfw::cker::SpaceToDepthParams params;
params.block_size = _block_size;
- nnfw::cker::SpaceToDepth(params, getTensorShape(_input),
- reinterpret_cast<const T *>(_input->buffer()), getTensorShape(_output),
- reinterpret_cast<T *>(_output->buffer()));
+ nnfw::cker::SpaceToDepth(params, getShape(_input), getBuffer<T>(_input), getShape(_output),
+ getBuffer<T>(_output));
}
void SpaceToDepthLayer::configure(const IPortableTensor *input, const int32_t block_size,
{
throw std::runtime_error("ArgMinMax: wrong shape of axis");
}
- auto axis = *reinterpret_cast<const int32_t *>(_axis->buffer());
+ auto axis = *getBuffer<int32_t>(_axis);
if (axis < 0)
{
- axis += _input->num_dimensions();
+ axis += _input->getShape().rank();
}
op_params.axis = axis;
op_params.num_split = _num_splits;
for (const auto output : _outputs)
{
assert(output->total_size() == sizeOfData(output->data_type(), output->getShape().dims()));
- outputPtrs.emplace_back(reinterpret_cast<T *>(output->buffer()));
+ outputPtrs.emplace_back(getBuffer<T>(output));
}
assert(_input->total_size() == sizeOfData(_input->data_type(), _input->getShape().dims()));
- nnfw::cker::Split<T>(op_params, getTensorShape(_input), reinterpret_cast<T *>(_input->buffer()),
- getTensorShape(_outputs[0]), outputPtrs.data());
+ nnfw::cker::Split<T>(op_params, getShape(_input), getBuffer<T>(_input), getShape(_outputs[0]),
+ outputPtrs.data());
}
void SplitLayer::configure(const IPortableTensor *input, const IPortableTensor *axis,
{
SplitVLayer::SplitVLayer()
- : _input(nullptr), _size_splits(nullptr), _split_dim(nullptr), _num_splits(0), _outputs()
+ : _input(nullptr), _size_splits(nullptr), _split_dim(nullptr), _num_splits(0), _outputs()
{
// DO NOTHING
}
template <typename T> void SplitVLayer::splitV(void)
{
nnfw::cker::SplitVParams op_params;
- op_params.axis = *(reinterpret_cast<const int32_t *>(_split_dim->buffer()));
+ op_params.axis = *getBuffer<int32_t>(_split_dim);
op_params.num_split = _num_splits;
std::vector<T *> outputPtrs;
for (const auto output : _outputs)
{
assert(output->total_size() == sizeOfData(output->data_type(), output->getShape().dims()));
- outputPtrs.emplace_back(reinterpret_cast<T *>(output->buffer()));
- outshape.emplace_back(getTensorShape(output));
+ outputPtrs.emplace_back(getBuffer<T>(output));
+ outshape.emplace_back(getShape(output));
}
assert(_input->total_size() == sizeOfData(_input->data_type(), _input->getShape().dims()));
- nnfw::cker::SplitV<T>(op_params, getTensorShape(_input), reinterpret_cast<T *>(_input->buffer()),
- outshape, outputPtrs.data());
+ nnfw::cker::SplitV<T>(op_params, getShape(_input), getBuffer<T>(_input), outshape,
+ outputPtrs.data());
}
void SplitVLayer::configure(const IPortableTensor *input, const IPortableTensor *size_splits,
void SqDiffLayer::SqDiffFloat32()
{
- nnfw::cker::SqDiff(getTensorShape(_input1), reinterpret_cast<const float *>(_input1->buffer()),
- getTensorShape(_input2), reinterpret_cast<const float *>(_input2->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+ nnfw::cker::SqDiff(getShape(_input1), getBuffer<float>(_input1), getShape(_input2),
+ getBuffer<float>(_input2), getShape(_output), getBuffer<float>(_output));
}
void SqDiffLayer::configure(const IPortableTensor *input1, const IPortableTensor *input2,
{
StatelessRandomUniformLayer::StatelessRandomUniformLayer()
- : _shape(nullptr), _seed(nullptr), _output(nullptr)
+ : _shape(nullptr), _seed(nullptr), _output(nullptr)
{
// DO NOTHING
}
void StatelessRandomUniformLayer::StatelessRandomUniformFloat32()
{
- nnfw::cker::StatelessRandomUniform(
- getTensorShape(_shape), reinterpret_cast<const int *>(_shape->buffer()),
- getTensorShape(_seed), reinterpret_cast<const int *>(_seed->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+ nnfw::cker::StatelessRandomUniform(getShape(_shape), getBuffer<int32_t>(_shape), getShape(_seed),
+ getBuffer<int32_t>(_seed), getShape(_output),
+ getBuffer<float>(_output));
}
void StatelessRandomUniformLayer::run()
{
StridedSliceLayer::StridedSliceLayer()
- : _input(nullptr), _begin(nullptr), _end(nullptr), _strides(nullptr), _output(nullptr),
- _begin_mask(0), _ellipsis_mask(0), _end_mask(0), _new_axis_mask(0), _shrink_axis_mask(0)
+ : _input(nullptr), _begin(nullptr), _end(nullptr), _strides(nullptr), _output(nullptr),
+ _begin_mask(0), _ellipsis_mask(0), _end_mask(0), _new_axis_mask(0), _shrink_axis_mask(0)
{
}
template <typename T> void StridedSliceLayer::stridedSliceImpl()
{
- const auto input_shape = getTensorShape(_input);
- const auto output_shape = getTensorShape(_output);
+ const auto input_shape = getShape(_input);
+ const auto output_shape = getShape(_output);
auto op_params = nnfw::cker::buildStridedSliceParams(
- reinterpret_cast<uint32_t *>(_begin->buffer()), reinterpret_cast<uint32_t *>(_end->buffer()),
- reinterpret_cast<uint32_t *>(_strides->buffer()), _begin_mask, _end_mask, _shrink_axis_mask,
- input_shape.DimensionsCount());
+ getBuffer<uint32_t>(_begin), getBuffer<uint32_t>(_end), getBuffer<uint32_t>(_strides),
+ _begin_mask, _end_mask, _shrink_axis_mask, input_shape.DimensionsCount());
nnfw::cker::checkOutputSize(op_params, input_shape, output_shape, input_shape.DimensionsCount());
- nnfw::cker::StridedSlice(op_params, input_shape, reinterpret_cast<const T *>(_input->buffer()),
- output_shape, reinterpret_cast<T *>(_output->buffer()));
+ nnfw::cker::StridedSlice(op_params, input_shape, getBuffer<T>(_input), output_shape,
+ getBuffer<T>(_output));
}
void StridedSliceLayer::configure(const IPortableTensor *input, const IPortableTensor *begin,
void TileLayer::tileFloat32()
{
- TileOneDimension(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- reinterpret_cast<const int *>(_multipliers->buffer()),
- reinterpret_cast<float *>(_output->buffer()), 0);
+ TileOneDimension(getShape(_input), getBuffer<float>(_input), getBuffer<int>(_multipliers),
+ getBuffer<float>(_output), 0);
}
void TileLayer::tileQuant8()
template <typename T> void TransposeLayer::transpose()
{
nnfw::cker::TransposeParams param;
- assert(_perm->num_dimensions() == 1);
+ auto perm_shape = _perm->getShape();
+ assert(perm_shape.rank() == 1);
- param.perm_count = _input->num_dimensions();
- if (_perm->dimension(0) == 0) // This means _perm is (n-1...0)
+ param.perm_count = _input->getShape().rank();
+ if (perm_shape.dim(0) == 0) // This means _perm is (n-1...0)
{
const auto begin = param.perm;
- const auto end = param.perm + _input->num_dimensions();
+ const auto end = param.perm + _input->getShape().rank();
std::iota(begin, end, 0);
std::reverse(begin, end);
}
else
{
- assert(param.perm_count == static_cast<int>(_perm->dimension(0)));
+ assert(param.perm_count == static_cast<int>(perm_shape.dim(0)));
for (auto i = 0; i < param.perm_count; i++)
{
- param.perm[i] = *(reinterpret_cast<const int32_t *>(_perm->buffer()) + i);
+ param.perm[i] = *(getBuffer<int32_t>(_perm) + i);
}
}
- nnfw::cker::Transpose(param, getTensorShape(_input),
- reinterpret_cast<const T *>(_input->buffer()), getTensorShape(_output),
- reinterpret_cast<T *>(_output->buffer()));
+ nnfw::cker::Transpose(param, getShape(_input), getBuffer<T>(_input), getShape(_output),
+ getBuffer<T>(_output));
}
void TransposeLayer::transposeQuant8()
{
- if (_input->data_offset() != _output->data_offset())
+ if (_input->data_zero_point() != _output->data_zero_point())
{
throw std::runtime_error("TransposeLayer : qassym8 input and output offsets unmatched");
}
for (int32_t i = 0; i < _num_output; i++)
{
- outputDims.push_back(getTensorShape(_outputs[i]));
+ outputDims.push_back(getShape(_outputs[i]));
outputDimsPtr.push_back(&outputDims[i]);
}
for (const auto output : _outputs)
{
- outputPtrs.emplace_back(reinterpret_cast<T *>(output->buffer()));
+ outputPtrs.emplace_back(getBuffer<T>(output));
}
- nnfw::cker::Unpack<T>(op_params, getTensorShape(_input), reinterpret_cast<T *>(_input->buffer()),
- getTensorShape(_outputs[0]), outputPtrs.data());
+ nnfw::cker::Unpack<T>(op_params, getShape(_input), getBuffer<T>(_input), getShape(_outputs[0]),
+ outputPtrs.data());
}
void UnpackLayer::configure(const IPortableTensor *input, uint32_t axis, int32_t num,
#include "BackendContext.h"
#include "Config.h"
-#include "ConstantInitializer.h"
#include "KernelGenerator.h"
#include <backend/Backend.h>
std::shared_ptr<IConfig> config() const override { return _config; }
- std::unique_ptr<onert::backend::BackendContext>
- newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &kb,
- bool) const override
+ std::unique_ptr<onert::backend::BackendContext> newContext(ContextData &&data) const override
{
- const auto &operands = graph.operands();
- const auto &operations = graph.operations();
- auto context = std::make_unique<BackendContext>(this, &graph);
- auto tr = std::make_shared<cpu_common::TensorRegistry>();
+ auto custom_kernel_builder = data.custom_kernel_builder;
+ auto &graph = *data.graph;
+ auto context = std::make_unique<BackendContext>(this, std::move(data));
+ auto tr = std::make_shared<basic::TensorRegistry>();
auto tb = std::make_shared<TensorBuilder>(tr);
context->tensor_registry = tr;
context->tensor_builder = tb;
- context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
- context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb,
+ context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, custom_kernel_builder,
context->external_context());
return context;
}
#include "ir/Index.h"
#include "ir/OperandIndexMap.h"
#include "ir/OperandIndexSequence.h"
-#include "backend/cpu_common/BackendContextHelpers.h"
+#include "backend/basic/BackendContextHelpers.h"
namespace onert
{
namespace ruy
{
-void BackendContext::initConsts()
-{
- for (auto &op : operation_list())
- {
- constant_initializer->setLayout(op.layout);
- graph()->operations().at(op.index).accept(*constant_initializer);
- }
-
- for (auto ind : operand_list())
- {
- const auto &obj = graph()->operands().at(ind);
- if (obj.isConstant() && !constant_initializer->exist(ind))
- {
- constant_initializer->registerDefaultInitializer(ind, obj);
- }
- }
-
- constant_initializer->run();
-}
-
-ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs,
- const ir::LowerInfoMap &lower_info)
-{
- auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
- ir::Remove::DUPLICATED;
- for (auto index : operand_list())
- {
- if (model_io.contains(index))
- continue;
- const auto &obj = graph()->operands().at(index);
- const auto frontend_layout = [&]() {
- if (obj.getUses().size() == 0)
- return ir::Layout::UNKNOWN;
- auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses?
- for (auto &operation_info : operation_list())
- {
- if (operation_info.index == use_op_ind)
- return operation_info.layout;
- }
- return ir::Layout::UNKNOWN;
- }();
- const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement();
- if (permute_factor.backend() != backend())
- continue;
- const auto backend_layout = permute_factor.layout();
- ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
- obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
- tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
- }
-
- // TODO Get compiler options from compiler, and use it rather than getting it from Env
- if (util::getConfigString(util::config::EXECUTOR) == "Linear")
- {
- cpu_common::planTensors(*this, order, op_seqs, lower_info);
- }
- else
- {
- // For the executors that does not have fixed linear execution order:
- // To make tensors never be deallocated, this is a workaround to use static memory planner
- for (auto ind : operand_list())
- {
- if (tensor_builder->isRegistered(ind))
- tensor_builder->notifyFirstUse(ind);
- }
- }
+ITensorRegistry *BackendContext::genTensors() { return basic::genTensors(*this); }
- tensor_builder->prepare();
-
- return tensor_registry.get();
-}
-
-FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs)
+FunctionMap BackendContext::genKernels()
{
FunctionMap ret;
- for (auto op_seq_ind : order)
+ for (auto op_ind : _data.op_order)
{
- const auto &op_seq = op_seqs.at(op_seq_ind);
- bool assigned = [&]() {
- for (auto op_info : operation_list())
- if (op_seq.exist(op_info.index))
- return true;
- return false;
- }();
- if (!assigned)
- continue;
- auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
- ret.emplace_back(op_seq_ind, std::move(fn_seq));
+ auto fn_seq = kernel_gen->generate(op_ind);
+ ret.emplace_back(op_ind, std::move(fn_seq));
}
- initConsts();
+ basic::initConsts(*this);
// NOTE For memory optimization, we want to free some operand data
- for (auto ind : operand_list())
- {
- // TODO Remove const_cast
- auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
- obj.releaseData();
- }
+ const_cast<ir::Graph &>(*_data.graph)
+ .operands()
+ .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
for (auto &it : ret)
{
#include <backend/BackendContext.h>
#include "TensorBuilder.h"
-#include "ConstantInitializer.h"
#include "KernelGenerator.h"
#include "ExternalContext.h"
class BackendContext : public onert::backend::BackendContext
{
public:
- BackendContext(const Backend *backend, const ir::Graph *graph,
+ BackendContext(const Backend *backend, ContextData &&data,
std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
- std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
- : onert::backend::BackendContext(backend, graph, tensor_registry),
- tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
- kernel_gen{kernel_gen}, _external_context(new ExternalContext)
+ : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
+ tensor_builder{tensor_builder}, kernel_gen{kernel_gen}, _external_context(new ExternalContext)
{
}
- ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs,
- const ir::LowerInfoMap &lower_info) override;
+ ITensorRegistry *genTensors() override;
- FunctionMap genKernels(const std::vector<ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs) override;
+ FunctionMap genKernels() override;
std::shared_ptr<ExternalContext> external_context() { return _external_context; }
private:
- void initConsts();
- void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+ void planTensors(const std::vector<onert::ir::OperationIndex> &order,
+ const compiler::GraphLowerInfo &lower_info);
public:
// TODO Make it private
std::shared_ptr<TensorBuilder> tensor_builder;
- std::shared_ptr<ConstantInitializer> constant_initializer;
std::shared_ptr<KernelGenerator> kernel_gen;
private:
ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout) { return ir::Layout::NHWC; }
-} // namespace cpu
+} // namespace ruy
} // namespace backend
} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_RUY_CONSTANT_INITIALIZER_H__
-#define __ONERT_BACKEND_RUY_CONSTANT_INITIALIZER_H__
-
-#include <backend/cpu_common/ConstantInitializer.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace ruy
-{
-
-using ConstantInitializer = cpu_common::ConstantInitializer;
-
-} // namespace ruy
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_RUY_CONSTANT_INITIALIZER_H__
#include <util/ConfigSource.h>
#include <ruy/context.h>
-namespace
-{
-const int kDefaultNumThreadpoolThreads = 4;
-}
-
namespace onert
{
namespace backend
class ExternalContext
{
+private:
+ static const int kDefaultNumThreadpoolThreads = 4;
+
public:
ExternalContext() : _ruy_context(new ::ruy::Context)
{
void setMaxNumThreads(int max_num_threads)
{
const int target_num_threads =
- max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads;
+ max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads;
_ruy_context->set_max_num_threads(target_num_threads);
}
namespace ruy
{
-KernelGenerator::KernelGenerator(
- const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder,
- const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
- const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
- const std::shared_ptr<ExternalContext> &external_context)
- : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
- _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
- _current_layout(ir::Layout::UNKNOWN), _external_context(external_context)
+std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
{
- // DO NOTHING
-}
+ auto ret = std::make_unique<exec::FunctionSequence>();
-void KernelGenerator::visit(const ir::OpSequence &op_seq)
-{
- assert(!_return_fn_seq);
assert(_tensor_builder->dynamicTensorManager());
assert(_tensor_reg);
auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
- _return_fn_seq = std::make_unique<exec::FunctionSequence>();
-
// Prepare to handle dynamic tensors later
auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
{
- dyn_ctx->op_seq = &op_seq;
+ dyn_ctx->op_ind = ind;
dyn_ctx->operations = &_operations_ctx;
dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
- dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
- _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
+ ret->dynamic_tensor_ctx(dyn_ctx);
}
- _current_layout = op_seq.getLayout();
- for (const auto &operation_idx : op_seq.operations())
+ auto &op = _graph.operations().at(ind);
+ op.accept(*this);
+ assert(_return_fn); // _return_fn must have been generated
+ ret->append(std::move(_return_fn));
+
+ for (auto ind : (op.getInputs() | ir::Remove::UNDEFINED) + op.getOutputs())
{
- const auto &node = _operations_ctx.at(operation_idx);
- node.accept(*this);
- _return_fn_seq->append(releaseFunction());
+ auto portable_tensor = _tensor_reg->getPortableTensor(ind);
+ if (portable_tensor)
+ {
+ assert(portable_tensor->layout() == ir::Layout::NHWC);
+ }
- for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
+ auto tensor = _tensor_reg->getNativeTensor(ind);
+ if (tensor)
{
- auto portable_tensor = _tensor_reg->getPortableTensor(ind);
- if (portable_tensor)
- {
- assert(portable_tensor->layout() == ir::Layout::NHWC);
- }
-
- auto tensor = _tensor_reg->getNativeTensor(ind);
- if (tensor)
- {
- tensor->increase_ref();
- }
+ tensor->increase_ref();
}
}
+ return ret;
+}
+
+KernelGenerator::KernelGenerator(
+ const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
+ const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
+ const std::shared_ptr<ExternalContext> &external_context)
+ : basic::KernelGeneratorBase{graph},
+ _ctx(graph.operands()), _operations_ctx{graph.operations()}, _current_layout{graph.layout()},
+ _tensor_builder(tensor_builder), _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
+ _external_context(external_context)
+{
+ // DO NOTHING
}
void KernelGenerator::visit(const ir::operation::Conv2D &node)
const auto ker_width = ker_shape.dim(2);
const auto padding =
- ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
- dilation.width_factor, dilation.height_factor);
+ ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+ dilation.width_factor, dilation.height_factor);
fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
#include "ExternalContext.h"
#include "TensorBuilder.h"
-#include "backend/cpu_common/TensorRegistry.h"
+#include "backend/basic/TensorRegistry.h"
#include "Tensor.h"
#include <backend/CustomKernelBuilder.h>
-#include <backend/cpu_common/KernelGeneratorBase.h>
+#include <backend/basic/KernelGeneratorBase.h>
#include <ir/Operands.h>
#include <ir/Operations.h>
namespace ruy
{
-class KernelGenerator : public cpu_common::KernelGeneratorBase
+class KernelGenerator : public basic::KernelGeneratorBase
{
public:
- KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder,
- const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
+ KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
const std::shared_ptr<ExternalContext> &external_context);
- void visit(const ir::OpSequence &) override;
+ std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override;
+
+private:
void visit(const ir::operation::Conv2D &) override;
void visit(const ir::operation::FullyConnected &) override;
private:
const ir::Operands &_ctx;
const ir::Operations &_operations_ctx;
+ const ir::Layout _current_layout;
std::shared_ptr<TensorBuilder> _tensor_builder;
- std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
+ std::shared_ptr<basic::TensorRegistry> _tensor_reg;
std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
- ir::Layout _current_layout;
const std::shared_ptr<ExternalContext> _external_context;
};
#ifndef __ONERT_BACKEND_RUY_STATICTENSOR_MANAGER_H__
#define __ONERT_BACKEND_RUY_STATICTENSOR_MANAGER_H__
-#include "backend/cpu_common/StaticTensorManager.h"
+#include "backend/basic/StaticTensorManager.h"
namespace onert
{
namespace ruy
{
-using StaticTensorManager = cpu_common::StaticTensorManager;
+using StaticTensorManager = basic::StaticTensorManager;
} // namespace ruy
} // namespace backend
#ifndef __ONERT_BACKEND_RUY_TENSOR_H__
#define __ONERT_BACKEND_RUY_TENSOR_H__
-#include <backend/cpu_common/Tensor.h>
+#include <backend/basic/Tensor.h>
#include <ir/Data.h>
namespace onert
namespace ruy
{
-using Tensor = cpu_common::Tensor;
-using ExternalTensor = cpu_common::ExternalTensor;
+using Tensor = basic::Tensor;
+using ExternalTensor = basic::ExternalTensor;
} // namespace ruy
} // namespace backend
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TensorBuilder.h"
-
-#include <util/logging.h>
-
-#include <cassert>
-
-namespace onert
-{
-namespace backend
-{
-namespace ruy
-{
-
-TensorBuilder::TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg)
- : _tensor_reg{tensor_reg},
- _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)},
- _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
-{
- /* empty */
-}
-
-void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- ir::Layout layout)
-{
- _tensor_info_map.emplace(ind, info);
-
- // CPU backend supports only one layout as NHWC
- assert(layout == ir::Layout::NHWC);
- if (info.isDynamic())
- {
- _dynamic_tensor_mgr->buildTensor(ind, info, layout);
- }
- else
- {
- _static_tensor_mgr->buildTensor(ind, info, layout, info.isConstant());
- }
-}
-
-void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
-{
- assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
- const auto tensor_info = _tensor_info_map.at(ind);
-
- if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
- {
- const auto size = tensor_info.total_size();
- _static_tensor_mgr->claimPlan(ind, size);
- }
-}
-
-void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
-{
- if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
- {
- _static_tensor_mgr->releasePlan(ind);
- }
-}
-
-bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
-{
- return _tensor_info_map.find(ind) != _tensor_info_map.end();
-}
-
-void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); }
-
-void TensorBuilder::allocate()
-{
- // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate
- // This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
-}
-
-} // namespace ruy
-} // namespace backend
-} // namespace onert
#ifndef __ONERT_BACKEND_RUY_TENSOR_BUILDER_H__
#define __ONERT_BACKEND_RUY_TENSOR_BUILDER_H__
-#include <backend/cpu_common/DynamicTensorManager.h>
-#include <backend/cpu_common/TensorRegistry.h>
-
-#include <ir/OperandIndexMap.h>
-
-#include "StaticTensorManager.h"
-#include "Tensor.h"
-
-#include <unordered_map>
+#include <backend/basic/TensorBuilder.h>
namespace onert
{
namespace ruy
{
-class TensorBuilder
-{
-public:
- TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg);
-
- /**
- * @brief Register tensor information to allocate on CPU backend
- * @param[in] ind Operand index
- * @param[in] info Operand information
- * @param[in] layout Operand data layout
- */
- void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- ir::Layout backend_layout);
-
- void notifyFirstUse(const ir::OperandIndex &);
- void notifyLastUse(const ir::OperandIndex &);
-
- bool isRegistered(const ir::OperandIndex &) const;
-
- void prepare(void);
- void allocate();
- void postFunctionPrepare() { /* DO NOTHING */}
-
- IDynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }
-
-private:
- const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
- std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr;
- std::unique_ptr<StaticTensorManager> _static_tensor_mgr;
- ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
-};
+using TensorBuilder = basic::TensorBuilder;
} // namespace ruy
} // namespace backend
namespace ops
{
ConvolutionLayer::ConvolutionLayer()
- : _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
- _paddingType(ir::PaddingType::EXPLICIT), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
- _paddingBottom(0), _strideWidth(0), _strideHeight(0), _dilationWidthFactor(1),
- _dilationHeightFactor(1), _activation(ir::Activation::NONE),
- _conv_kernel(new nnfw::ruy::Conv()), _prepare(false)
+ : _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+ _paddingType(ir::PaddingType::EXPLICIT), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
+ _paddingBottom(0), _strideWidth(0), _strideHeight(0), _dilationWidthFactor(1),
+ _dilationHeightFactor(1), _activation(ir::Activation::NONE),
+ _conv_kernel(new nnfw::ruy::Conv()), _prepare(false)
{
// DO NOTHING
}
param_padding.param.bottom = _paddingBottom;
const auto padding =
- ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
- _dilationWidthFactor, _dilationHeightFactor);
+ ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+ _dilationWidthFactor, _dilationHeightFactor);
_paddingLeft = padding.left;
_paddingRight = padding.right;
{
FullyConnectedLayer::FullyConnectedLayer()
- : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
- _activation(ir::Activation::NONE), _external_context(nullptr)
+ : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
+ _activation(ir::Activation::NONE), _external_context(nullptr)
{
// DO NOTHING
}
op_params.rhs_cacheable = _input->is_constant();
nnfw::ruy::FullyConnected(
- op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
- getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
- _external_context->ruy_context());
+ op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+ getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
+ getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
+ _external_context->ruy_context());
}
void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
#include "BackendContext.h"
#include "Config.h"
-#include "ConstantInitializer.h"
#include "KernelGenerator.h"
#include <backend/Backend.h>
std::shared_ptr<IConfig> config() const override { return _config; }
- std::unique_ptr<onert::backend::BackendContext>
- newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &kb,
- bool) const override
+ std::unique_ptr<onert::backend::BackendContext> newContext(ContextData &&data) const override
{
- const auto &operands = graph.operands();
- const auto &operations = graph.operations();
- auto context = std::make_unique<BackendContext>(this, &graph);
- auto tr = std::make_shared<cpu_common::TensorRegistry>();
+ auto custom_kernel_builder = data.custom_kernel_builder;
+ auto &graph = *data.graph;
+ auto context = std::make_unique<BackendContext>(this, std::move(data));
+ auto tr = std::make_shared<basic::TensorRegistry>();
auto tb = std::make_shared<TensorBuilder>(tr);
context->tensor_registry = tr;
context->tensor_builder = tb;
- context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
- context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb,
+ context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, custom_kernel_builder,
context->external_context());
return context;
}
#include "ir/Index.h"
#include "ir/OperandIndexMap.h"
#include "ir/OperandIndexSequence.h"
-#include "backend/cpu_common/BackendContextHelpers.h"
+#include "backend/basic/BackendContextHelpers.h"
namespace onert
{
namespace xnnpack
{
-void BackendContext::initConsts()
-{
- for (auto &op : operation_list())
- {
- constant_initializer->setLayout(op.layout);
- graph()->operations().at(op.index).accept(*constant_initializer);
- }
-
- for (auto ind : operand_list())
- {
- const auto &obj = graph()->operands().at(ind);
- if (obj.isConstant() && !constant_initializer->exist(ind))
- {
- constant_initializer->registerDefaultInitializer(ind, obj);
- }
- }
-
- constant_initializer->run();
-}
-
-ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs,
- const ir::LowerInfoMap &lower_info)
-{
- auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
- ir::Remove::DUPLICATED;
- for (auto index : operand_list())
- {
- if (model_io.contains(index))
- continue;
- const auto &obj = graph()->operands().at(index);
- const auto frontend_layout = [&]() {
- if (obj.getUses().size() == 0)
- return ir::Layout::UNKNOWN;
- auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses?
- for (auto &operation_info : operation_list())
- {
- if (operation_info.index == use_op_ind)
- return operation_info.layout;
- }
- return ir::Layout::UNKNOWN;
- }();
- const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement();
- if (permute_factor.backend() != backend())
- continue;
- const auto backend_layout = permute_factor.layout();
- ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
- obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
- tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
- }
-
- // TODO Get compiler options from compiler, and use it rather than getting it from Env
- if (util::getConfigString(util::config::EXECUTOR) == "Linear")
- {
- cpu_common::planTensors(*this, order, op_seqs, lower_info);
- }
- else
- {
- // For the executors that does not have fixed linear execution order:
- // To make tensors never be deallocated, this is a workaround to use static memory planner
- for (auto ind : operand_list())
- {
- if (tensor_builder->isRegistered(ind))
- tensor_builder->notifyFirstUse(ind);
- }
- }
+ITensorRegistry *BackendContext::genTensors() { return basic::genTensors(*this); }
- tensor_builder->prepare();
-
- return tensor_registry.get();
-}
-
-FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs)
+FunctionMap BackendContext::genKernels()
{
FunctionMap ret;
- for (auto op_seq_ind : order)
+ for (auto op_ind : _data.op_order)
{
- const auto &op_seq = op_seqs.at(op_seq_ind);
- bool assigned = [&]() {
- for (auto op_info : operation_list())
- if (op_seq.exist(op_info.index))
- return true;
- return false;
- }();
- if (!assigned)
- continue;
- auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
- ret.emplace_back(op_seq_ind, std::move(fn_seq));
+ auto fn_seq = kernel_gen->generate(op_ind);
+ ret.emplace_back(op_ind, std::move(fn_seq));
}
- initConsts();
+ basic::initConsts(*this);
// NOTE For memory optimization, we want to free some operand data
- for (auto ind : operand_list())
- {
- // TODO Remove const_cast
- auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
- obj.releaseData();
- }
+ const_cast<ir::Graph &>(*_data.graph)
+ .operands()
+ .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
for (auto &it : ret)
{
#include <backend/BackendContext.h>
#include <util/ConfigSource.h>
#include "TensorBuilder.h"
-#include "ConstantInitializer.h"
#include "KernelGenerator.h"
#include "ExternalContext.h"
-namespace
-{
const int kDefaultNumThreadpoolThreads = 1;
-}
namespace onert
{
class BackendContext : public onert::backend::BackendContext
{
public:
- BackendContext(const Backend *backend, const ir::Graph *graph,
+ BackendContext(const Backend *backend, ContextData &&data,
std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
- std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
- : onert::backend::BackendContext(backend, graph, tensor_registry),
- tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
- kernel_gen{kernel_gen}, _external_context(nullptr)
+ : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
+ tensor_builder{tensor_builder}, kernel_gen{kernel_gen}, _external_context(nullptr)
{
int num_threads = util::getConfigInt(util::config::XNNPACK_THREADS);
if (num_threads < 1)
_external_context.reset(new ExternalContext(static_cast<size_t>(num_threads)));
}
- ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs,
- const ir::LowerInfoMap &lower_info) override;
-
- FunctionMap genKernels(const std::vector<ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs) override;
+ ITensorRegistry *genTensors() override;
+ FunctionMap genKernels() override;
std::shared_ptr<ExternalContext> external_context() { return _external_context; }
-private:
- void initConsts();
- void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
-
public:
// TODO Make it private
std::shared_ptr<TensorBuilder> tensor_builder;
- std::shared_ptr<ConstantInitializer> constant_initializer;
std::shared_ptr<KernelGenerator> kernel_gen;
private:
ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout) { return ir::Layout::NHWC; }
-} // namespace cpu
+} // namespace xnnpack
} // namespace backend
} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_XNNPACK_CONSTANT_INITIALIZER_H__
-#define __ONERT_BACKEND_XNNPACK_CONSTANT_INITIALIZER_H__
-
-#include <backend/cpu_common/ConstantInitializer.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace xnnpack
-{
-
-using ConstantInitializer = cpu_common::ConstantInitializer;
-
-} // namespace xnnpack
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_XNNPACK_CONSTANT_INITIALIZER_H__
{
ExternalContext::ExternalContext(size_t num_threads)
- : _threadpool(pthreadpool_create(num_threads), pthreadpool_destroy)
+ : _threadpool(pthreadpool_create(num_threads), pthreadpool_destroy)
{
assert(_threadpool);
}
{
KernelGenerator::KernelGenerator(
- const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder,
- const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
- const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
- const std::shared_ptr<ExternalContext> &external_context)
- : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
- _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
- _current_layout(ir::Layout::UNKNOWN), _external_context(external_context)
+ const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
+ const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
+ const std::shared_ptr<ExternalContext> &external_context)
+ : basic::KernelGeneratorBase{graph},
+ _ctx(graph.operands()), _operations_ctx{graph.operations()}, _current_layout{graph.layout()},
+ _tensor_builder(tensor_builder), _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
+ _external_context(external_context)
{
// DO NOTHING
}
-void KernelGenerator::visit(const ir::OpSequence &op_seq)
+std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
{
- assert(!_return_fn_seq);
+ auto ret = std::make_unique<exec::FunctionSequence>();
+
assert(_tensor_builder->dynamicTensorManager());
assert(_tensor_reg);
auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
- _return_fn_seq = std::make_unique<exec::FunctionSequence>();
-
// Prepare to handle dynamic tensors later
auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
{
- dyn_ctx->op_seq = &op_seq;
+ dyn_ctx->op_ind = ind;
dyn_ctx->operations = &_operations_ctx;
dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
- dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
- _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
+ ret->dynamic_tensor_ctx(dyn_ctx);
}
- _current_layout = op_seq.getLayout();
- for (const auto &operation_idx : op_seq.operations())
+ auto &op = _graph.operations().at(ind);
+ op.accept(*this);
+ assert(_return_fn); // _return_fn must have been generated
+ ret->append(std::move(_return_fn));
+
+ for (auto ind : (op.getInputs() | ir::Remove::UNDEFINED) + op.getOutputs())
{
- const auto &node = _operations_ctx.at(operation_idx);
- node.accept(*this);
- _return_fn_seq->append(releaseFunction());
+ auto portable_tensor = _tensor_reg->getPortableTensor(ind);
+ if (portable_tensor)
+ {
+ assert(portable_tensor->layout() == ir::Layout::NHWC);
+ }
- for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
+ auto tensor = _tensor_reg->getNativeTensor(ind);
+ if (tensor)
{
- auto portable_tensor = _tensor_reg->getPortableTensor(ind);
- if (portable_tensor)
- {
- assert(portable_tensor->layout() == ir::Layout::NHWC);
- }
-
- auto tensor = _tensor_reg->getNativeTensor(ind);
- if (tensor)
- {
- tensor->increase_ref();
- }
+ tensor->increase_ref();
}
}
+ return ret;
}
void KernelGenerator::visit(const ir::operation::Conv2D &node)
const auto ker_width = ker_shape.dim(2);
const auto padding =
- ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
- dilation.width_factor, dilation.height_factor);
+ ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+ dilation.width_factor, dilation.height_factor);
fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
#include "ExternalContext.h"
#include "TensorBuilder.h"
-#include "backend/cpu_common/TensorRegistry.h"
+#include "backend/basic/TensorRegistry.h"
#include "Tensor.h"
#include <backend/CustomKernelBuilder.h>
-#include <backend/cpu_common/KernelGeneratorBase.h>
+#include <backend/basic/KernelGeneratorBase.h>
#include <ir/Operands.h>
#include <ir/Operations.h>
namespace xnnpack
{
-class KernelGenerator : public cpu_common::KernelGeneratorBase
+class KernelGenerator : public basic::KernelGeneratorBase
{
public:
- KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder,
- const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
+ KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
const std::shared_ptr<ExternalContext> &external_context);
- void visit(const ir::OpSequence &) override;
+ std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override;
+
+private:
void visit(const ir::operation::Conv2D &) override;
void visit(const ir::operation::DepthwiseConv2D &) override;
void visit(const ir::operation::FullyConnected &) override;
private:
const ir::Operands &_ctx;
const ir::Operations &_operations_ctx;
+ ir::Layout _current_layout;
std::shared_ptr<TensorBuilder> _tensor_builder;
- std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
+ std::shared_ptr<basic::TensorRegistry> _tensor_reg;
std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
- ir::Layout _current_layout;
const std::shared_ptr<ExternalContext> _external_context;
};
#ifndef __ONERT_BACKEND_XNNPACK_STATICTENSOR_MANAGER_H__
#define __ONERT_BACKEND_XNNPACK_STATICTENSOR_MANAGER_H__
-#include "backend/cpu_common/StaticTensorManager.h"
+#include "backend/basic/StaticTensorManager.h"
namespace onert
{
namespace xnnpack
{
-using StaticTensorManager = cpu_common::StaticTensorManager;
+using StaticTensorManager = basic::StaticTensorManager;
} // namespace xnnpack
} // namespace backend
#ifndef __ONERT_BACKEND_XNNPACK_TENSOR_H__
#define __ONERT_BACKEND_XNNPACK_TENSOR_H__
-#include <backend/cpu_common/Tensor.h>
+#include <backend/basic/Tensor.h>
#include <ir/Data.h>
namespace onert
namespace xnnpack
{
-using Tensor = cpu_common::Tensor;
-using ExternalTensor = cpu_common::ExternalTensor;
+using Tensor = basic::Tensor;
+using ExternalTensor = basic::ExternalTensor;
} // namespace xnnpack
} // namespace backend
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TensorBuilder.h"
-
-#include <util/logging.h>
-
-#include <cassert>
-
-namespace onert
-{
-namespace backend
-{
-namespace xnnpack
-{
-
-TensorBuilder::TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg)
- : _tensor_reg{tensor_reg},
- _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)},
- _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
-{
- /* empty */
-}
-
-void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- ir::Layout layout)
-{
- _tensor_info_map.emplace(ind, info);
-
- // XNNPACK backend supports only one layout as NHWC
- assert(layout == ir::Layout::NHWC);
- if (info.isDynamic())
- {
- _dynamic_tensor_mgr->buildTensor(ind, info, layout);
- }
- else
- {
- _static_tensor_mgr->buildTensor(ind, info, layout, info.isConstant());
- }
-}
-
-void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
-{
- assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
- const auto tensor_info = _tensor_info_map.at(ind);
-
- if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
- {
- const auto size = tensor_info.total_size();
- _static_tensor_mgr->claimPlan(ind, size);
- }
-}
-
-void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
-{
- if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
- {
- _static_tensor_mgr->releasePlan(ind);
- }
-}
-
-bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
-{
- return _tensor_info_map.find(ind) != _tensor_info_map.end();
-}
-
-void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); }
-
-void TensorBuilder::allocate()
-{
- // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate
- // This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
-}
-
-} // namespace xnnpack
-} // namespace backend
-} // namespace onert
#ifndef __ONERT_BACKEND_XNNPACK_TENSOR_BUILDER_H__
#define __ONERT_BACKEND_XNNPACK_TENSOR_BUILDER_H__
-#include <backend/cpu_common/DynamicTensorManager.h>
-#include <backend/cpu_common/TensorRegistry.h>
-
-#include <ir/OperandIndexMap.h>
-
-#include "StaticTensorManager.h"
-#include "Tensor.h"
-
-#include <unordered_map>
+#include <backend/basic/TensorBuilder.h>
namespace onert
{
namespace xnnpack
{
-class TensorBuilder
-{
-public:
- TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg);
-
- /**
- * @brief Register tensor information to allocate on XNNPACK backend
- * @param[in] ind Operand index
- * @param[in] info Operand information
- * @param[in] layout Operand data layout
- */
- void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- ir::Layout backend_layout);
-
- void notifyFirstUse(const ir::OperandIndex &);
- void notifyLastUse(const ir::OperandIndex &);
-
- bool isRegistered(const ir::OperandIndex &) const;
-
- void prepare(void);
- void allocate();
- void postFunctionPrepare() { /* DO NOTHING */}
-
- IDynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }
-
-private:
- const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
- std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr;
- std::unique_ptr<StaticTensorManager> _static_tensor_mgr;
- ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
-};
+using TensorBuilder = basic::TensorBuilder;
} // namespace xnnpack
} // namespace backend
namespace ops
{
ConvolutionLayer::ConvolutionLayer(const std::shared_ptr<ExternalContext> external_context)
- : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
- _padding_type(ir::PaddingType::EXPLICIT), _padding_left(0), _padding_top(0),
- _padding_right(0), _padding_bottom(0), _stride_width(0), _stride_height(0),
- _dilation_width_factor(1), _dilation_height_factor(1), _activation(ir::Activation::NONE)
+ : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+ _padding_type(ir::PaddingType::EXPLICIT), _padding_left(0), _padding_top(0), _padding_right(0),
+ _padding_bottom(0), _stride_width(0), _stride_height(0), _dilation_width_factor(1),
+ _dilation_height_factor(1), _activation(ir::Activation::NONE)
{
// DO NOTHING
}
assert(static_cast<uint32_t>(_output->getShape().dim(3)) == output_channels);
enum xnn_status status = xnn_create_convolution2d_nhwc_f32(
- _padding_top, _padding_right, _padding_bottom, _padding_left, kernel_height, kernel_width,
- _stride_height, _stride_width, _dilation_height_factor, _dilation_width_factor,
- 1 /* groups */, input_channels /* group_input_channels */,
- output_channels /* group_output_channels */, input_channels /* input_channel_stride */,
- output_channels /* output_channel_stride */,
- reinterpret_cast<const float *>(_kernel->buffer()),
- reinterpret_cast<const float *>(_bias->buffer()), output_activation_min,
- output_activation_max, 0, &_kernel_op);
+ _padding_top, _padding_right, _padding_bottom, _padding_left, kernel_height, kernel_width,
+ _stride_height, _stride_width, _dilation_height_factor, _dilation_width_factor, 1 /* groups */,
+ input_channels /* group_input_channels */, output_channels /* group_output_channels */,
+ input_channels /* input_channel_stride */, output_channels /* output_channel_stride */,
+ reinterpret_cast<const float *>(_kernel->buffer()),
+ reinterpret_cast<const float *>(_bias->buffer()), output_activation_min, output_activation_max,
+ 0, &_kernel_op);
if (status != xnn_status_success)
{
throw std::runtime_error{"failed to create FP32 Convolution operator"};
uint32_t input_height = _input->getShape().dim(1);
uint32_t batch_size = _input->getShape().dim(0);
enum xnn_status status = xnn_setup_convolution2d_nhwc_f32(
- _kernel_op, batch_size, input_height, input_width,
- reinterpret_cast<const float *>(_input->buffer()),
- reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool());
+ _kernel_op, batch_size, input_height, input_width,
+ reinterpret_cast<const float *>(_input->buffer()), reinterpret_cast<float *>(_output->buffer()),
+ _external_context->getThreadPool());
if (status != xnn_status_success)
{
throw std::runtime_error{"failed to create FP32 Convolution operator"};
{
DepthwiseConvolutionLayer::DepthwiseConvolutionLayer(
- const std::shared_ptr<ExternalContext> external_context)
- : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
- _padding_type(ir::PaddingType::EXPLICIT), _padding_left(0), _padding_top(0),
- _padding_right(0), _padding_bottom(0), _stride_width(0), _stride_height(0), _multiplier(1),
- _dilation_width_factor(1), _dilation_height_factor(1), _activation(ir::Activation::NONE)
+ const std::shared_ptr<ExternalContext> external_context)
+ : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+ _padding_type(ir::PaddingType::EXPLICIT), _padding_left(0), _padding_top(0), _padding_right(0),
+ _padding_bottom(0), _stride_width(0), _stride_height(0), _multiplier(1),
+ _dilation_width_factor(1), _dilation_height_factor(1), _activation(ir::Activation::NONE)
{
// DO NOTHING
}
void DepthwiseConvolutionLayer::configure(
- const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias,
- ir::PaddingType padding_type, const uint32_t padding_left, const uint32_t padding_right,
- const uint32_t padding_top, const uint32_t padding_bottom, const uint32_t stride_width,
- const uint32_t stride_height, const uint32_t multiplier, const uint32_t dilation_width_factor,
- const uint32_t dilation_height_factor, const ir::Activation activation, IPortableTensor *output)
+ const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias,
+ ir::PaddingType padding_type, const uint32_t padding_left, const uint32_t padding_right,
+ const uint32_t padding_top, const uint32_t padding_bottom, const uint32_t stride_width,
+ const uint32_t stride_height, const uint32_t multiplier, const uint32_t dilation_width_factor,
+ const uint32_t dilation_height_factor, const ir::Activation activation, IPortableTensor *output)
{
_input = input;
_kernel = kernel;
assert(output_channels == input_channels * _multiplier);
enum xnn_status status = xnn_create_convolution2d_nhwc_f32(
- _padding_top, _padding_right, _padding_bottom, _padding_left, kernel_height, kernel_width,
- _stride_height, _stride_width, _dilation_height_factor, _dilation_width_factor,
- input_channels /* groups */, 1 /* group_input_channels */,
- _multiplier /* group_output_channels */, input_channels /* input_channel_stride */,
- output_channels /* output_channel_stride */,
- reinterpret_cast<const float *>(_kernel->buffer()),
- reinterpret_cast<const float *>(_bias->buffer()), output_activation_min,
- output_activation_max, XNN_FLAG_DEPTHWISE_CONVOLUTION, &_kernel_op);
+ _padding_top, _padding_right, _padding_bottom, _padding_left, kernel_height, kernel_width,
+ _stride_height, _stride_width, _dilation_height_factor, _dilation_width_factor,
+ input_channels /* groups */, 1 /* group_input_channels */,
+ _multiplier /* group_output_channels */, input_channels /* input_channel_stride */,
+ output_channels /* output_channel_stride */, reinterpret_cast<const float *>(_kernel->buffer()),
+ reinterpret_cast<const float *>(_bias->buffer()), output_activation_min, output_activation_max,
+ XNN_FLAG_DEPTHWISE_CONVOLUTION, &_kernel_op);
if (status != xnn_status_success)
{
throw std::runtime_error{"failed to create FP32 DepthwiseConvolution operator"};
uint32_t input_height = _input->getShape().dim(1);
uint32_t batch_size = _input->getShape().dim(0);
enum xnn_status status = xnn_setup_convolution2d_nhwc_f32(
- _kernel_op, batch_size, input_height, input_width,
- reinterpret_cast<const float *>(_input->buffer()),
- reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool());
+ _kernel_op, batch_size, input_height, input_width,
+ reinterpret_cast<const float *>(_input->buffer()), reinterpret_cast<float *>(_output->buffer()),
+ _external_context->getThreadPool());
if (status != xnn_status_success)
{
throw std::runtime_error{"failed to create FP32 DepthwiseConvolution operator"};
{
FullyConnectedLayer::FullyConnectedLayer(const std::shared_ptr<ExternalContext> external_context)
- : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
- _activation(ir::Activation::NONE)
+ : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+ _activation(ir::Activation::NONE)
{
// DO NOTHING
}
const float *bias_buffer = (_bias) ? reinterpret_cast<const float *>(_bias->buffer()) : nullptr;
enum xnn_status status = xnn_create_fully_connected_nc_f32(
- input_channels, output_channels, input_channels /* input stride */,
- output_channels /* output stride */, kernel_buffer, bias_buffer, output_activation_min,
- output_activation_max, flag, &_kernel_op);
+ input_channels, output_channels, input_channels /* input stride */,
+ output_channels /* output stride */, kernel_buffer, bias_buffer, output_activation_min,
+ output_activation_max, flag, &_kernel_op);
if (status != xnn_status_success)
{
throw std::runtime_error{"failed to create FP32 FullyConnected operator"};
uint32_t batch_size = _input->getShape().num_elements() / _kernel->getShape().dim(1);
enum xnn_status status = xnn_setup_fully_connected_nc_f32(
- _kernel_op, batch_size, reinterpret_cast<const float *>(_input->buffer()),
- reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool());
+ _kernel_op, batch_size, reinterpret_cast<const float *>(_input->buffer()),
+ reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool());
if (status != xnn_status_success)
{
throw std::runtime_error{"failed to create FP32 FullyConnected operator"};
{
public:
Layer(const std::shared_ptr<ExternalContext> external_context)
- : _kernel_op{nullptr}, _create{false}, _setup{false}, _external_context{external_context}
+ : _kernel_op{nullptr}, _create{false}, _setup{false}, _external_context{external_context}
{
// DO NOTHING
}
endif(NOT ENABLE_TEST)
# Unit Tests
-set(TEST_ONERT_BACKEND_CPU_COMMON test_onert_backend_cpu_common)
+set(TEST_ONERT_CORE test_onert_core)
-add_executable(${TEST_ONERT_BACKEND_CPU_COMMON} ${TESTS})
+add_executable(${TEST_ONERT_CORE} ${TESTS})
-target_link_libraries(${TEST_ONERT_BACKEND_CPU_COMMON} onert_core)
-target_link_libraries(${TEST_ONERT_BACKEND_CPU_COMMON} gtest gtest_main dl ${LIB_PTHREAD})
+target_link_libraries(${TEST_ONERT_CORE} onert_core)
+target_link_libraries(${TEST_ONERT_CORE} gtest gtest_main dl ${LIB_PTHREAD})
-add_test(${TEST_ONERT_BACKEND_CPU_COMMON} ${TEST_ONERT_BACKEND_CPU_COMMON})
-install(TARGETS ${TEST_ONERT_BACKEND_CPU_COMMON} DESTINATION unittest_standalone)
+add_test(${TEST_ONERT_CORE} ${TEST_ONERT_CORE})
+install(TARGETS ${TEST_ONERT_CORE} DESTINATION unittest_standalone)
virtual ~Backend() = default;
virtual std::shared_ptr<onert::backend::IConfig> config() const = 0;
- virtual std::unique_ptr<BackendContext>
- newContext(const ir::Graph &graph, const std::shared_ptr<backend::custom::IKernelBuilder> &kb,
- bool is_linear_executor) const = 0;
+ virtual std::unique_ptr<BackendContext> newContext(ContextData &&) const = 0;
};
} // namespace backend
#include <memory>
#include "ir/Graph.h"
-#include "ir/LowerInfoMap.h"
+#include "ir/OperationIndexMap.h"
+#include "ir/OperandIndexMap.h"
+#include "compiler/GraphLowerInfo.h"
#include "exec/FunctionSequence.h"
namespace onert
struct ITensorRegistry;
using FunctionMap =
- std::vector<std::pair<ir::OpSequenceIndex, std::unique_ptr<exec::FunctionSequence>>>;
+ std::vector<std::pair<ir::OperationIndex, std::unique_ptr<exec::FunctionSequence>>>;
-class BackendContext
+struct ContextData
{
-public:
- struct OperationInfo
- {
- ir::OperationIndex index;
- ir::Layout layout;
-
- OperationInfo(ir::OperationIndex index, ir::Layout layout) : index{index}, layout{layout} {}
- };
+ /* A partial graph that only includes used operand/operations of the original graph */
+ std::unique_ptr<ir::Graph> graph;
+ /* A linear order of operations. This is neccessary for when a graph is not fully connected */
+ std::vector<onert::ir::OperationIndex> op_order;
+ /* Operands that are defined by other backends */
+ util::Set<ir::OperandIndex> external_operands;
+ /* Operand layout info */
+ ir::OperandIndexMap<ir::Layout> operand_layouts;
+ /* Custom kernel builder */
+ std::shared_ptr<custom::IKernelBuilder> custom_kernel_builder;
+ /* Is linear executor or not */
+ bool is_linear_executor;
+};
+class BackendContext
+{
public:
- BackendContext(const Backend *backend, const ir::Graph *graph,
+ BackendContext(const Backend *backend, ContextData &&data,
std::shared_ptr<ITensorRegistry> tensor_registry = nullptr)
- : _backend{backend}, _graph{graph}, tensor_registry{tensor_registry}
+ : _backend{backend}, _data{std::move(data)}, tensor_registry{tensor_registry}
{
}
virtual ~BackendContext() = default;
- void initialize(const std::vector<OperationInfo> &operation_list,
- const std::vector<ir::OperandIndex> &operand_list);
- void initConsts();
-
const Backend *backend() const { return _backend; }
- const ir::Graph *graph() const { return _graph; }
- const std::vector<OperationInfo> &operation_list() const { return _operation_list; }
- const std::vector<ir::OperandIndex> &operand_list() const { return _operand_list; }
+ const ir::Graph *graph() const { return _data.graph.get(); }
+ const util::Set<ir::OperandIndex> &external_operands() const { return _data.external_operands; }
+ const ir::OperandIndexMap<ir::Layout> &operand_layouts() const { return _data.operand_layouts; }
+ const ContextData &data() const { return _data; }
- virtual ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &,
- const ir::OpSequences &, const ir::LowerInfoMap &)
- {
- return nullptr;
- }
- virtual FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &,
- const ir::OpSequences &)
- {
- return {};
- }
+ virtual ITensorRegistry *genTensors() = 0;
+ virtual FunctionMap genKernels() = 0;
-private:
+protected:
const Backend *_backend{nullptr};
- const ir::Graph *_graph{nullptr};
- std::vector<OperationInfo> _operation_list;
- std::vector<ir::OperandIndex> _operand_list;
+ ContextData _data;
public:
std::shared_ptr<ITensorRegistry> tensor_registry;
*/
virtual ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) = 0;
/**
- * @brief The function that is called after each OpSequence run on profiling mode.
+ * @brief The function that is called after each Operation run on profiling mode.
* This may be useful for profiling GPU-based or special computing units.
*/
virtual void sync() const {}
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_IDYNAMICTENSOR_MANAGER_H__
-#define __ONERT_BACKEND_IDYNAMICTENSOR_MANAGER_H__
-
-#include "ITensorManager.h"
-
-#include <ir/Index.h>
-#include <ir/Operation.h>
-#include <ir/Shape.h>
-#include <backend/ITensor.h>
-
-namespace onert
-{
-namespace backend
-{
-
-/**
- * @brief Interface as an abstract tensor manager, providing ways to handle memory
- * for dynamic tensors.
- */
-struct IDynamicTensorManager : public ITensorManager
-{
- virtual ~IDynamicTensorManager() = default;
-
-public:
- /**
- * @brief Plan when to delete a tensor. Note this planning is done at compilation time.
- * @param op_ind operation index
- * @param tensor candidate ITensor to dealloc. Tensor can be static
- * or dynamic since tensor type may not be clearly known at compilation time.
- */
- virtual void planDealloc(ir::OperationIndex op_ind, backend::ITensor *tensor) = 0;
-
- /**
- * @brief Deallocate input tensors of op if an input tensor is a dynamic tensor and it won't
- * be used anymore
- * @note This will work after calling planDealloc
- */
- virtual void deallocInput(ir::OperationIndex op_ind) = 0;
-};
-
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_IDYNAMICTENSOR_MANAGER_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_IMEMORY_MANAGER_H__
-#define __ONERT_BACKEND_IMEMORY_MANAGER_H__
-
-namespace onert
-{
-namespace backend
-{
-
-struct IMemoryManager
-{
- virtual ~IMemoryManager() = default;
-
- virtual void allocate(void) = 0;
- virtual void deallocate(void) = 0;
-};
-
-} // namespace backend
-} // namespace onert
-
-#include <unordered_set>
-#include <memory>
-
-namespace onert
-{
-namespace backend
-{
-
-using MemoryManagerSet = std::unordered_set<std::unique_ptr<backend::IMemoryManager>>;
-
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_IMEMORY_MANAGER_H__
virtual ~IPortableTensor();
virtual const ir::Sparsity *sparsity() const { return nullptr; }
const ir::OperandInfo &get_info() const { return _info; }
+ float data_scale() const override { return _info.typeInfo().scale(); }
+ int32_t data_zero_point() const override { return _info.typeInfo().zero_point(); }
+ const std::vector<float> &data_scales() const override { return _info.typeInfo().scales(); }
+ const std::vector<int32_t> &data_zero_points() const override
+ {
+ return _info.typeInfo().zero_points();
+ }
public:
bool has_padding() const final { return false; }
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_ISTATICTENSOR_MANAGER_H__
-#define __ONERT_BACKEND_ISTATICTENSOR_MANAGER_H__
-
-#include "ITensorManager.h"
-
-namespace onert
-{
-namespace backend
-{
-
-struct IStaticTensorManager : public ITensorManager
-{
- virtual ~IStaticTensorManager() = default;
-};
-
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_ISTATICTENSOR_MANAGER_H__
namespace backend
{
-struct IDynamicTensorManager;
-
class ITensor
{
public:
- virtual ~ITensor() = default;
+ virtual ~ITensor();
public:
virtual uint8_t *buffer() const = 0;
virtual size_t total_size() const = 0;
- virtual size_t dimension(size_t index) const = 0;
- virtual size_t num_dimensions() const = 0;
virtual size_t calcOffset(const ir::Coordinates &coords) const = 0;
virtual ir::Layout layout() const = 0;
virtual ir::DataType data_type() const = 0;
virtual float data_scale() const = 0;
- virtual int32_t data_offset() const = 0;
+ virtual int32_t data_zero_point() const = 0;
+ virtual const std::vector<float> &data_scales() const = 0;
+ virtual const std::vector<int32_t> &data_zero_points() const = 0;
virtual bool has_padding() const = 0;
virtual void access(const std::function<void(ITensor &tensor)> &fn) = 0;
throw std::runtime_error("This backend does not support dynamic tensor");
}
+ /// @brief Dealloc the buffer (only for dynamic tensors)
+ virtual void deallocBuffer()
+ {
+ throw std::runtime_error("This backend does not support resetting buffer");
+ }
+
/**
* @brief Set the shape of tenser to new_shape
* @note Higer dimension will be placed on front.
* @brief Get ir::Shape of tensor
* @note Higer dimension will be placed on front.
*/
- virtual ir::Shape getShape() const;
+ virtual ir::Shape getShape() const = 0;
virtual bool is_subtensor() const { return false; }
virtual bool needMemoryMap() const { return false; }
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_ITENSOR_MANAGER_H__
-#define __ONERT_BACKEND_ITENSOR_MANAGER_H__
-
-namespace onert
-{
-namespace backend
-{
-
-// NOTE This name ITensorManager has been discussed whether or not the name is proper.
-// Anyone can argue with any better name.
-/**
- * @brief Interface as an abstract tensor manager which has MemoryManager
- * This is used as a base class for IStaticTensorManager and IDynamicTensorManager
- */
-struct ITensorManager
-{
- virtual ~ITensorManager() = default;
-};
-
-} // namespace backend
-} // namespace onert
-
-#include <unordered_set>
-#include <memory>
-
-namespace onert
-{
-namespace backend
-{
-
-using TensorManagerSet = std::unordered_set<std::unique_ptr<backend::ITensorManager>>;
-
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_ITENSOR_MANAGER_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file       Allocator.h
+ * @brief      This file contains Allocator related classes
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_ALLOCATOR_H__
+#define __ONERT_BACKEND_BASIC_ALLOCATOR_H__
+
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+/**
+ * @brief Class to allocate memory
+ */
+class Allocator
+{
+public:
+ Allocator(uint32_t capacity);
+ /**
+ * @brief Get memory base pointer
+ * @return base pointer
+ */
+ uint8_t *base() const { return _base.get(); }
+ void release() { _base.reset(); }
+
+private:
+ std::unique_ptr<uint8_t[]> _base;
+};
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_ALLOCATOR_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_BACKEND_CONTEXT_HELPERS_H__
+#define __ONERT_BACKEND_BASIC_BACKEND_CONTEXT_HELPERS_H__
+
+#include <vector>
+
+#include "ir/Index.h"
+#include "compiler/GraphLowerInfo.h"
+#include "util/logging.h"
+#include "backend/ITensorRegistry.h"
+#include "backend/BackendContext.h"
+#include "Tensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+// TODO Remove the template param BackendContext once unification of cpu backend context is done
+template <typename T_BackendContext> void planTensors(const T_BackendContext &ctx)
+{
+ const ir::Graph &graph = *ctx.graph();
+ const auto &order = ctx.data().op_order;
+ auto tensor_builder = ctx.tensor_builder;
+
+ ir::OperandIndexMap<uint32_t> uses_map;
+ ir::OperandIndexMap<uint32_t> def_map;
+ ir::OperandIndexSequence constants;
+
+ auto model_io =
+ (graph.getInputs() + graph.getOutputs()) | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+
+ // Prepare scanning
+ graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+ if (ctx.external_operands().contains(ind))
+ return;
+
+ // TODO Check if we need to handle unused tensors
+
+ uses_map[ind] = obj.getUses().size();
+ def_map[ind] = obj.getDef().valid() ? 1 : 0;
+
+ if (obj.isConstant())
+ constants.append(ind);
+
+ if (!tensor_builder->isRegistered(ind))
+ {
+ // These tensors do not exist in any (No use and def)
+ const auto info = obj.info();
+ // NOTE Currently we only support NHWC tensors for cpu-common tensors.
+ // There is no way to get the layout info from the backend context for now.
+ // When we support NCHW tensors as well, we also need to change tensor info to be
+ // permuted shape.
+ assert(ctx.operand_layouts().at(ind) == ir::Layout::NHWC);
+ tensor_builder->registerTensorInfo(ind, info, ir::Layout::NHWC);
+ }
+ });
+
+ // Start scanning to do notify{First|Last}Use for each tensor
+
+ // If a tensor is a constant, increase the use of the tensor and allocate it first.
+ // Increasing use count here makes the tensor never be deallocated, i.e it they will be
+ // deallocated last.
+ for (const auto &ind : constants)
+ {
+ uses_map[ind]++;
+ tensor_builder->notifyFirstUse(ind);
+ }
+
+ for (auto &pair : def_map)
+ {
+ if (pair.second == 0)
+ tensor_builder->notifyFirstUse(pair.first);
+ }
+
+ // This is a workaround to keep the operands over the execution
+ // (the operands look like they are unused)
+ std::vector<ir::OperandIndex> operands_last_until_end;
+ for (auto &pair : uses_map)
+ {
+ if (pair.second == 0)
+ operands_last_until_end.push_back(pair.first);
+ }
+
+ // At each operation,
+ // 1. Scan DEF of outputs. If the DEF, allocate it
+ // 2. Scan DEF of inputs. If variable tensor, allocate it
+ // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
+ for (const auto op_ind : order)
+ {
+ const auto &op = graph.operations().at(op_ind);
+ auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+ auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+
+ // Define outputs
+ for (const auto &ind : op_outputs)
+ {
+ if (ctx.external_operands().contains(ind))
+ continue;
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ assert(def_map.find(ind) != def_map.end());
+ if (def_map[ind])
+ {
+ def_map[ind] = 0;
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ // Scan variable tensors
+ // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
+ // non-constant because of less memory usage by memory planning in here
+ for (const auto &ind : op_inputs)
+ {
+ if (ctx.external_operands().contains(ind))
+ continue;
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ const auto &operand = graph.operands().at(ind);
+ if (operand.info().isVariable())
+ {
+ // The variable tensor with buffer is not supported yet
+ assert(operand.data() == nullptr);
+ assert(operand.getUses().size() == 1 && !operand.getDef().valid());
+ assert(uses_map[ind] == 1 && def_map[ind] == 0);
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ for (const auto &ind : op_inputs)
+ {
+ if (ctx.external_operands().contains(ind))
+ continue;
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ assert(uses_map.find(ind) != uses_map.end());
+ assert(uses_map[ind] > 0);
+ uses_map[ind]--;
+ if (uses_map[ind] == 0)
+ {
+ // plan for deallocation of static tensornode
+ tensor_builder->notifyLastUse(ind);
+ }
+ }
+ }
+
+ for (auto ind : operands_last_until_end)
+ {
+ tensor_builder->notifyLastUse(ind);
+ }
+
+ // Dispose and validate
+ for (const auto &ind : constants)
+ {
+ --uses_map[ind];
+ if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
+ {
+ tensor_builder->notifyLastUse(ind);
+ }
+ }
+
+ assert(
+ std::all_of(uses_map.begin(), uses_map.end(),
+ [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+
+ assert(
+ std::all_of(def_map.begin(), def_map.end(),
+ [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+}
+
+template <typename T_BackendContext> ITensorRegistry *genTensors(T_BackendContext &ctx)
+{
+ const ir::Graph &graph = *ctx.graph();
+ auto tensor_builder = ctx.tensor_builder;
+
+ auto model_io =
+ (graph.getInputs() + graph.getOutputs()) | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+ graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+ if (ctx.external_operands().contains(ind))
+ return;
+ // NOTE Assuming there is no layout changes (Always assume NHWC or UNKNOWN)
+ assert(graph.layout() != ir::Layout::NCHW);
+ ir::OperandInfo backend_info{obj.shape(), obj.typeInfo(), obj.info().memAllocType(),
+ obj.isConstant()};
+ tensor_builder->registerTensorInfo(ind, backend_info, ir::Layout::NHWC);
+ });
+
+ // TODO Get compiler options from compiler, and use it rather than getting it from Env
+ if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+ {
+ basic::planTensors(ctx);
+ }
+ else
+ {
+ // For the executors that does not have fixed linear execution order:
+ // To make tensors never be deallocated, this is a workaround to use static memory planner
+ graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
+ if (tensor_builder->isRegistered(ind))
+ tensor_builder->notifyFirstUse(ind);
+ });
+ }
+
+ tensor_builder->allocate();
+
+ return ctx.tensor_registry.get();
+}
+
+inline void initConsts(BackendContext &ctx)
+{
+ ctx.graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
+ if (ctx.external_operands().contains(ind) || !operand.isConstant())
+ return;
+
+ auto tensor = ctx.tensor_registry->getNativeITensor(ind);
+ assert(tensor != nullptr);
+
+ VERBOSE(FillOperandData) << "Fill data for " << ind << std::endl;
+
+ auto data = operand.shareData();
+ assert(data && data->base());
+ ExternalTensor *ext_tensor = dynamic_cast<ExternalTensor *>(tensor);
+
+ if (ext_tensor == nullptr)
+ throw std::runtime_error{"This tensor is not external tensor"};
+
+ ext_tensor->setData(data);
+ });
+}
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_BACKEND_CONTEXT_HELPERS_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_DYNAMICTENSOR_MANAGER_H__
+#define __ONERT_BACKEND_BASIC_DYNAMICTENSOR_MANAGER_H__
+
+#include "MemoryManager.h"
+#include "TensorRegistry.h"
+
+#include <ir/OperandInfo.h>
+#include <ir/Operation.h>
+#include <ir/Index.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+// TODO Find optimized algorithm to manage memory.
+
+/**
+ * @brief Class to manage dynamic tensor and its memory
+ */
+class DynamicTensorManager
+{
+public:
+ DynamicTensorManager(const std::shared_ptr<TensorRegistry> ®);
+
+ virtual ~DynamicTensorManager() = default;
+
+ void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
+ ir::Layout backend_layout);
+
+ std::shared_ptr<DynamicMemoryManager> dynamic_mem_mgr() { return _dynamic_mem_mgr; }
+
+private:
+ const ITensor *getRawITensor(ir::OperandIndex ind);
+
+private:
+ /**
+ * @brief Memory manager for dynamic tensor.
+ * @todo DynamicMemoryManager is not optimized. Optimized one is needed
+ */
+ std::shared_ptr<DynamicMemoryManager> _dynamic_mem_mgr;
+ const std::shared_ptr<TensorRegistry> _tensors;
+
+ // contains list of dynamic tensor index, which can be deallocated after running operation
+ // note: this map could contain static tensor index too. Careful use is required.
+ std::unordered_map<ir::OperationIndex, std::unordered_set<backend::ITensor *>>
+ _dealloc_tensor_map;
+};
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_DYNAMICTENSOR_MANAGER_H__
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_IMEMORY_PLANNER_H__
+#define __ONERT_BACKEND_IMEMORY_PLANNER_H__
+
+#include "ir/OperandIndexMap.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+/**
+ * @brief Structure to have memory offset and size
+ */
+struct Block
+{
+ uint32_t offset;
+ size_t size;
+};
+
+/**
+ * @brief Interface to plan memory
+ */
+struct IMemoryPlanner
+{
+ using MemoryPlans = ir::OperandIndexMap<Block>;
+
+ /**
+ * @brief Claim memory for operand
+ * @param[in] index The operand index
+ * @param[in] size The size of the memory
+ */
+ virtual void claim(const ir::OperandIndex &, size_t) = 0;
+ /**
+ * @brief Release memory for operand
+ * @param[in] index The operand index
+ */
+ virtual void release(const ir::OperandIndex &) = 0;
+ /**
+ * @brief Get capacity for memory planning
+ * @return The value of capacity
+ */
+ virtual uint32_t capacity() = 0;
+ /**
+ * @brief Get MemoryPlans
+ * @return MemoryPlans
+ */
+ virtual MemoryPlans &memory_plans() = 0;
+
+ virtual ~IMemoryPlanner() = default;
+};
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_IMEMORY_PLANNER_H__
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_KERNEL_GENERATOR_BASE_H__
+#define __ONERT_BACKEND_BASIC_KERNEL_GENERATOR_BASE_H__
+
+#include <assert.h>
+#include <memory>
+#include <functional>
+
+#include "ir/Graph.h"
+#include "ir/OperationVisitor.h"
+#include "exec/FunctionSequence.h"
+#include "backend/ITensorRegistry.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+class KernelGeneratorBase : public ir::OperationVisitor
+{
+public:
+ virtual ~KernelGeneratorBase() = default;
+ KernelGeneratorBase(const ir::Graph &graph) : _graph{graph} {}
+
+ virtual std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) = 0;
+
+protected:
+ using OperationVisitor::visit;
+
+#define OP(InternalName) \
+ void visit(const ir::operation::InternalName &) override \
+ { \
+ throw std::runtime_error("KernelGenerator: NYI for operation '" #InternalName "'"); \
+ }
+#include "ir/Operations.lst"
+#undef OP
+
+protected:
+ std::unique_ptr<exec::IFunction> releaseFunction()
+ {
+ assert(_return_fn);
+ return std::move(_return_fn);
+ }
+
+protected:
+ const ir::Graph &_graph;
+ std::unique_ptr<exec::IFunction> _return_fn;
+};
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_KERNEL_GENERATOR_BASE_H__
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_MEMORY_MANAGER_H__
+#define __ONERT_BACKEND_CPU_MEMORY_MANAGER_H__
+
+#include "Allocator.h"
+#include "IMemoryPlanner.h"
+
+namespace onert
+{
+namespace backend
+{
+
+class ITensor;
+
+namespace basic
+{
+
+class MemoryManager
+{
+public:
+ MemoryManager();
+ MemoryManager(const std::string);
+ virtual ~MemoryManager() = default;
+
+ void allocate(void);
+ uint8_t *getBuffer(const ir::OperandIndex &ind) const;
+ void deallocate(void) { _mem_alloc->release(); }
+
+ void claimPlan(const ir::OperandIndex &ind, uint32_t size);
+ void releasePlan(const ir::OperandIndex &ind);
+
+private:
+ IMemoryPlanner *createMemoryPlanner();
+ IMemoryPlanner *createMemoryPlanner(const std::string);
+
+private:
+ ir::OperandIndexMap<Block> _tensor_mem_map;
+ std::shared_ptr<IMemoryPlanner> _mem_planner;
+ std::shared_ptr<Allocator> _mem_alloc;
+};
+
+class DynamicMemoryManager
+{
+public:
+ DynamicMemoryManager() = default;
+ virtual ~DynamicMemoryManager() = default;
+
+ std::shared_ptr<Allocator> allocate(const ITensor *tensor, uint32_t capacity);
+ void deallocate(const ITensor *tensor);
+ void deallocate(void);
+
+private:
+ std::unordered_map<const ITensor *, std::shared_ptr<Allocator>> _mem_alloc_map;
+};
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_MEMORY_MANAGER_H__
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_STATICTENSOR_MANAGER_H__
+#define __ONERT_BACKEND_BASIC_STATICTENSOR_MANAGER_H__
+
+#include "backend/basic/DynamicTensorManager.h"
+#include "backend/basic/MemoryManager.h"
+#include "backend/basic/TensorRegistry.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandInfo.h"
+#include "TensorRegistry.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+class DynamicTensorManager;
+
+class StaticTensorManager
+{
+public:
+ StaticTensorManager(const std::shared_ptr<TensorRegistry> ®,
+ DynamicTensorManager *dynamic_tensor_manager);
+ virtual ~StaticTensorManager() = default;
+
+ void allocateNonconsts(void);
+ void deallocateNonconsts(void);
+
+ void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
+ ir::Layout backend_layout, bool as_const);
+
+ void claimPlan(const ir::OperandIndex &ind, uint32_t size);
+ void releasePlan(const ir::OperandIndex &ind);
+
+ void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
+
+private:
+ std::unique_ptr<MemoryManager> _nonconst_mgr;
+ const std::shared_ptr<TensorRegistry> _tensors;
+ ir::OperandIndexMap<bool> _as_constants;
+ DynamicTensorManager *_dynamic_tensor_manager;
+};
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_STATICTENSOR_MANAGER_H__
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_TENSOR_H__
+#define __ONERT_BACKEND_BASIC_TENSOR_H__
+
+#include "Allocator.h"
+
+#include <backend/IPortableTensor.h>
+#include <ir/OperandInfo.h>
+#include <ir/Data.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+class DynamicMemoryManager;
+
+class Tensor : public IPortableTensor
+{
+public:
+ Tensor() = delete;
+ virtual ~Tensor();
+
+public:
+ Tensor(const ir::OperandInfo &info, const ir::Layout layout,
+ DynamicMemoryManager *dynamic_mem_mgr)
+ : IPortableTensor(info), _layout(layout), _buffer(nullptr), _num_references(0),
+ _dynamic_mem_mgr(dynamic_mem_mgr), _allocator(nullptr)
+ {
+ // DO NOTHING
+ }
+
+public:
+ // Only one of two method 'setBuffer' must be called once
+
+ /**
+ * @brief Set the Buffer object. This method is called for static and non-const tensor
+ */
+ void setBuffer(uint8_t *buffer) { _buffer = buffer; }
+
+ /**
+ * @brief Set the Buffer object. This method is called for dynamic or const tensor
+ */
+ void setBuffer(const std::shared_ptr<Allocator> &alloc)
+ {
+ _allocator = alloc;
+ _buffer = alloc->base();
+ }
+
+ /**
+ * @brief Reset the buffer and deallocate the allocation if it is managed by itself
+ */
+ void deallocBuffer() override;
+
+public:
+ uint8_t *buffer() const override { return _buffer; }
+ /**
+ * @brief Get dimension by index
+ *
+ * @param index Index to get diemension
+ * @return size_t Dimension at index
+ * @note N : dimension(0)
+ * H : dimension(1)
+ * W : dimension(2)
+ * C : dimension(3)
+ */
+ size_t total_size() const override { return _info.total_size(); }
+ size_t calcOffset(const ir::Coordinates &coords) const override;
+ ir::Layout layout() const override { return _layout; }
+ ir::DataType data_type() const override { return _info.typeInfo().type(); }
+ bool is_constant() const override { return _info.isConstant(); }
+ bool is_dynamic() const override { return _info.isDynamic(); }
+ void set_dynamic() override { _info.setDynamic(); }
+ bool applyShape(const ir::Shape &new_shape) override;
+ const ir::Sparsity *sparsity() const override { return _info.typeInfo().sparsity(); }
+
+ virtual void increase_ref()
+ {
+ assert(is_dynamic() ||
+ // when not dynamic
+ (_buffer != nullptr));
+
+ ++_num_references;
+ }
+
+ virtual void decrease_ref()
+ {
+ assert(_buffer != nullptr || _allocator != nullptr);
+ assert(_num_references > 0);
+ --_num_references;
+ // constant tensor and dynamic tensor has _allocator
+ if (_num_references == 0)
+ {
+ if (_buffer != nullptr)
+ _buffer = nullptr;
+ if (_allocator != nullptr)
+ {
+ _allocator->release();
+ _allocator = nullptr;
+ }
+ }
+ }
+
+ /**
+ * @brief Reset reference count to zero and release data
+ */
+ virtual void reset_ref()
+ {
+ assert(_buffer != nullptr || _allocator != nullptr);
+ assert(_num_references > 0);
+ _num_references = 0;
+
+ // Only constant tensor has allocator pointer
+ if (_buffer != nullptr)
+ _buffer = nullptr;
+ else
+ {
+ _allocator->release();
+ _allocator = nullptr;
+ }
+ }
+
+ virtual int32_t num_references() { return _num_references; }
+
+ void setShape(const ir::Shape &new_shape) override;
+ ir::Shape getShape() const override;
+
+protected:
+ ir::Layout _layout;
+ uint8_t *_buffer;
+ int32_t _num_references;
+ DynamicMemoryManager *_dynamic_mem_mgr;
+
+private:
+ /**
+ * @brief Memory allocator for dynamic tensor and const tensor
+ * Since maintaing _allocator and also _buffer makes confusion,
+ * we will mainly use _buffer (not _allocator.base()) for memory pointer in this code.
+ * _allocator(shared_ptr) is used to guarantee that we have valid _buffer.
+ */
+ std::shared_ptr<Allocator> _allocator;
+};
+
+/**
+ * @brief Class that uses data from external memory that is not managed by a backend
+ * instead of allocating and copying the data. ExternalTensor's data pointer points to
+ * an address of memory such as where memory is already allocated, or mmapped area.
+ * This is meaning that ExternalTensor can take all of types' ir::Data.
+ * To support this, assume below things no padding, always NHWC layout,
+ * constant tensor and not dynamic.
+ */
+class ExternalTensor : public Tensor
+{
+public:
+ ExternalTensor() = delete;
+ virtual ~ExternalTensor();
+
+public:
+ ExternalTensor(const ir::OperandInfo &info, const ir::Layout layout)
+ : Tensor(info, layout, nullptr)
+ {
+ assert(_layout == ir::Layout::NHWC);
+ assert(_info.isConstant());
+ assert(_info.isDynamic() == false);
+ }
+
+public:
+ /**
+ * @brief set Data to be shared from external so that this ExternalTensor will not be
+ * allocated on CPU backend
+ * @param[in] data data of Operand to be set
+ */
+ void setData(const std::shared_ptr<ir::Data> data)
+ {
+ assert(data != nullptr);
+ _data = data;
+ // Note. Some op such as cker::Conv could take buffer as nullptr.
+ // That's why _buffer also would be used
+ _buffer = const_cast<uint8_t *>(_data->base());
+ }
+
+public:
+ uint8_t *buffer() const override { return _buffer; }
+
+ bool is_constant() const override { return true; }
+ bool is_dynamic() const override { return false; }
+ void set_dynamic() override
+ {
+ throw std::runtime_error("This tensor does not support changing dynamic");
+ }
+
+ void setShape(const ir::Shape &) override
+ {
+ throw std::runtime_error("This tensor does not support changing shape");
+ }
+
+ void increase_ref() override { ++_num_references; }
+
+ void decrease_ref() override
+ {
+ assert(_data != nullptr);
+ assert(_num_references > 0);
+ --_num_references;
+ if (_num_references == 0)
+ {
+ _data.reset();
+ _buffer = nullptr;
+ }
+ }
+
+ /**
+ * @brief Reset reference count to zero and release data
+ */
+ void reset_ref() override
+ {
+ assert(_data != nullptr);
+ assert(_num_references > 0);
+ _num_references = 0;
+
+ _data.reset();
+ _buffer = nullptr;
+ }
+
+ int32_t num_references() override { return _num_references; }
+
+private:
+ std::shared_ptr<const ir::Data> _data;
+};
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_TENSOR_H__
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_TENSOR_BUILDER_H__
+#define __ONERT_BACKEND_BASIC_TENSOR_BUILDER_H__
+
+#include <backend/basic/DynamicTensorManager.h>
+#include <backend/basic/TensorRegistry.h>
+#include <backend/basic/StaticTensorManager.h>
+
+#include <ir/OperandIndexMap.h>
+
+#include "Tensor.h"
+
+#include <unordered_map>
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+class TensorBuilder
+{
+public:
+ TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg);
+
+ /**
+ * @brief Register tensor information to allocate on CPU backend
+ * @param[in] ind Operand index
+ * @param[in] info Operand information
+ * @param[in] layout Operand data layout
+ */
+ void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+ ir::Layout backend_layout);
+
+ void notifyFirstUse(const ir::OperandIndex &);
+ void notifyLastUse(const ir::OperandIndex &);
+
+ bool isRegistered(const ir::OperandIndex &) const;
+
+ void allocate(void);
+
+ DynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }
+
+private:
+ const std::shared_ptr<TensorRegistry> _tensor_reg;
+ std::unique_ptr<DynamicTensorManager> _dynamic_tensor_mgr;
+ std::unique_ptr<StaticTensorManager> _static_tensor_mgr;
+ ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
+};
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_TENSOR_BUILDER_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_TENSOR_REGISTRY__
+#define __ONERT_BACKEND_BASIC_TENSOR_REGISTRY__
+
+#include "backend/ITensorRegistry.h"
+#include "Tensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+using TensorRegistry = PortableTensorRegistryTemplate<basic::Tensor>;
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_TENSOR_REGISTRY__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file       Allocator.h
- * @brief      This file contains Allocator related classes
- */
-
-#ifndef __ONERT_BACKEND_CPU_COMMON_ALLOCATOR_H__
-#define __ONERT_BACKEND_CPU_COMMON_ALLOCATOR_H__
-
-#include <memory>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-/**
- * @brief Class to allocate memory
- */
-class Allocator
-{
-public:
- Allocator(uint32_t capacity);
- /**
- * @brief Get memory base pointer
- * @return base pointer
- */
- uint8_t *base() const { return _base.get(); }
- void release() { _base.reset(); }
-
-private:
- std::unique_ptr<uint8_t[]> _base;
-};
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_COMMON_ALLOCATOR_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_COMMON_BACKEND_CONTEXT_HELPERS_H__
-#define __ONERT_BACKEND_CPU_COMMON_BACKEND_CONTEXT_HELPERS_H__
-
-#include <vector>
-
-#include "ir/Index.h"
-#include "ir/OpSequences.h"
-#include "ir/LowerInfoMap.h"
-#include "util/logging.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-// TODO Remove the template param BackendContext once unification of cpu backend context is done
-template <typename T_BackendContext>
-void planTensors(const T_BackendContext &ctx, const std::vector<onert::ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info)
-{
- auto graph = ctx.graph();
- auto tensor_builder = ctx.tensor_builder;
-
- ir::OperandIndexMap<uint32_t> uses_map;
- ir::OperandIndexMap<uint32_t> def_map;
- ir::OperandIndexSequence constants;
-
- auto model_io =
- (graph->getInputs() + graph->getOutputs()) | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
-
- // Prepare scanning
- for (auto ind : ctx.operand_list())
- {
- if (model_io.contains(ind))
- continue;
- const auto &obj = graph->operands().at(ind);
- const auto &li = lower_info.operand.at(ind);
- if (li->def_factors().getOnlyElement().backend() != ctx.backend())
- continue;
-
- // Ignore unused tensor
- if (li->def_factors().size() == 0 && li->use_factors().size() == 0)
- {
- VERBOSE_F() << "Operand #" << ind.value() << " will not be used. no more process."
- << std::endl;
- return;
- }
-
- uses_map[ind] = obj.getUses().size();
- def_map[ind] = obj.getDef().valid() ? 1 : 0;
-
- if (obj.isConstant())
- constants.append(ind);
-
- auto factor = li->def_factors().getOnlyElement();
- if (!tensor_builder->isRegistered(ind))
- {
- // These tensors do not exist in any op_seq (No use and def)
- const auto info = obj.info();
- const auto backend_layout = factor.layout();
- // TODO Change tensor info to have permuted shape
- tensor_builder->registerTensorInfo(ind, info, backend_layout);
- }
- }
-
- // Start scanning to do notify{First|Last}Use for each tensor
-
- // If a tensor is a constant, increase the use of the tensor and allocate it first.
- // Increasing use count here makes the tensor never be deallocated, i.e it they will be
- // deallocated last.
- for (const auto &ind : constants)
- {
- uses_map[ind]++;
- tensor_builder->notifyFirstUse(ind);
- }
-
- // At each operation,
- // 1. Scan DEF of outputs. If the DEF, allocate it
- // 2. Scan DEF of inputs. If variable tensor, allocate it
- // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
- for (const auto op_seq_ind : order)
- {
- const auto &op_seq = op_seqs.at(op_seq_ind);
- for (const auto &op_idx : op_seq.operations())
- {
- auto op_inputs = graph->operations().at(op_idx).getInputs() | ir::Remove::DUPLICATED |
- ir::Remove::UNDEFINED;
- auto op_outputs = graph->operations().at(op_idx).getOutputs() | ir::Remove::DUPLICATED |
- ir::Remove::UNDEFINED;
-
- // Define outputs
- for (const auto &ind : op_outputs)
- {
- if (model_io.contains(ind))
- continue;
- if (!tensor_builder->isRegistered(ind))
- continue;
- assert(def_map.find(ind) != def_map.end());
- if (def_map[ind])
- {
- def_map[ind] = 0;
- tensor_builder->notifyFirstUse(ind);
- }
- }
-
- // Scan variable tensors
- // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
- // non-constant because of less memory usage by memory planning in here
- for (const auto &ind : op_inputs)
- {
- if (model_io.contains(ind))
- continue;
- if (!tensor_builder->isRegistered(ind))
- continue;
- const auto &operand = graph->operands().at(ind);
- if (operand.info().isVariable())
- {
- // The variable tensor with buffer is not supported yet
- assert(operand.data() == nullptr);
- assert(operand.getUses().size() == 1 && !operand.getDef().valid());
- assert(lower_info.operand.at(ind)->def_factors().size() == 1 &&
- lower_info.operand.at(ind)->use_factors().size() == 1);
- assert(uses_map[ind] == 1 && def_map[ind] == 0);
- tensor_builder->notifyFirstUse(ind);
- }
- }
-
- for (const auto &ind : op_inputs)
- {
- if (model_io.contains(ind))
- continue;
- if (!tensor_builder->isRegistered(ind))
- continue;
- assert(uses_map.find(ind) != uses_map.end());
- assert(uses_map[ind] > 0);
- uses_map[ind]--;
- if (uses_map[ind] == 0)
- {
- // plan for deallocation of static tensornode
- tensor_builder->notifyLastUse(ind);
-
- // plan for deallocation of dynamic tensor
- auto dyn_tensor_manager = tensor_builder->dynamicTensorManager();
- auto *tensor = ctx.tensor_registry->getITensor(ind);
- assert(tensor);
- dyn_tensor_manager->planDealloc(op_idx, tensor);
- }
- }
- }
- }
-
- // Dispose and validate
- for (const auto &ind : constants)
- {
- --uses_map[ind];
- if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
- {
- tensor_builder->notifyLastUse(ind);
- }
- }
-
- assert(
- std::all_of(uses_map.begin(), uses_map.end(),
- [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-
- assert(
- std::all_of(def_map.begin(), def_map.end(),
- [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-}
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_COMMON_BACKEND_CONTEXT_HELPERS_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_H__
-#define __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_H__
-
-#include "TensorRegistry.h"
-
-#include "ConstantInitializerBase.h"
-#include <ir/Operands.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-class ConstantInitializer : public ConstantInitializerBase
-{
-public:
- ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<ITensorRegistry> &tensor_reg);
-
-public:
- void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj) override;
-
- // TODO: For now the only cpu backend supports constant tensor to use data from external
- // If the other backend supports (to do this,
- // ExternalTensor should be abstract such as IExternal, maybe),
- // this can be an interface of cpu_common::ConstantInitializerBase
- void registerExternalInitializer(const ir::OperandIndex &, const ir::Operand &);
-
-private:
- std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; }
-
-private:
- std::shared_ptr<ITensorRegistry> _tensor_reg;
-};
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_BASE_H__
-#define __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_BASE_H__
-
-#include <unordered_map>
-#include <functional>
-
-#include "ir/Coordinates.h"
-#include "ir/Layout.h"
-#include "ir/Operand.h"
-#include "ir/Operands.h"
-#include "ir/OperationVisitor.h"
-#include "ir/OpSequence.h"
-#include "backend/ITensorRegistry.h"
-#include "util/logging.h"
-#include "backend/ITensorRegistry.h"
-
-namespace
-{
-template <typename T>
-static void Init(const onert::ir::Operand &model_obj, onert::backend::ITensor &obj, const bool copy,
- const onert::ir::Layout frontend_layout = onert::ir::Layout::UNKNOWN)
-{
- const auto shape = model_obj.shape();
- assert(model_obj.data());
- auto base = reinterpret_cast<const T *>(model_obj.data()->base());
-
- obj.access([&](::onert::backend::ITensor &tensor) {
- switch (shape.rank())
- {
- case 0:
- {
- assert(model_obj.data()->size() == sizeof(T));
- const auto value = *reinterpret_cast<const T *>(base);
- T *into = reinterpret_cast<T *>(tensor.buffer());
- *into = value;
- break;
- }
- case 1:
- {
- auto vec_size = shape.dim(0);
- for (int32_t n = 0; n < vec_size; ++n)
- {
- const T *from = reinterpret_cast<const T *>(base) + n;
- const auto value = *from;
-
- T *into = reinterpret_cast<T *>(tensor.buffer()) + n;
-
- *into = value;
- }
- break;
- }
- case 2:
- {
- const int32_t copy_len = shape.dim(1);
-
- for (auto i = 0; i < shape.dim(0); ++i)
- {
- ::onert::ir::Coordinates coords{i, 0};
- memcpy(tensor.buffer() + tensor.calcOffset(coords), base + i * copy_len,
- copy_len * sizeof(T));
- }
- break;
- }
- case 3:
- {
- const int32_t width = shape.dim(1);
- const int32_t copy_len = shape.dim(2);
-
- for (auto i = 0; i < shape.dim(0); ++i)
- {
- for (auto j = 0; j < shape.dim(1); ++j)
- {
- ::onert::ir::Coordinates coords{i, j, 0};
- memcpy(tensor.buffer() + tensor.calcOffset(coords),
- base + i * width * copy_len + j * copy_len, copy_len * sizeof(T));
- }
- }
- break;
- }
- case 4:
- {
- const int32_t height = shape.dim(1);
- const int32_t width = shape.dim(2);
- const int32_t copy_len = shape.dim(3);
- for (auto i = 0; i < shape.dim(0); ++i)
- {
- for (auto j = 0; j < shape.dim(1); ++j)
- {
- for (auto k = 0; k < shape.dim(2); ++k)
- {
- if (copy)
- {
- ::onert::ir::Coordinates coords{i, j, k, 0};
- memcpy(tensor.buffer() + tensor.calcOffset(coords),
- base + i * height * width * copy_len + j * width * copy_len + k * copy_len,
- copy_len * sizeof(T));
- }
- else
- {
- for (auto l = 0; l < shape.dim(3); ++l)
- {
- const auto coords = ::onert::ir::convertCoordinates({i, j, k, l}, frontend_layout,
- tensor.layout());
- T *into = reinterpret_cast<T *>(tensor.buffer() + tensor.calcOffset(coords));
- T value = *(base + i * height * width * copy_len + j * width * copy_len +
- k * copy_len + l);
- *into = value;
- }
- }
- }
- }
- }
- break;
- }
- default:
- throw std::runtime_error{"Not yet supported"};
- }
- });
-}
-
-template <typename T>
-void copyInit(const onert::ir::Operand &model_obj, onert::backend::ITensor &obj)
-{
- Init<T>(model_obj, obj, true);
-}
-
-template <typename T>
-void permuteInit(const onert::ir::Operand &model_obj, onert::backend::ITensor &obj,
- const onert::ir::Layout frontend_layout)
-{
- const bool copy = frontend_layout == obj.layout();
- Init<T>(model_obj, obj, copy, frontend_layout);
-}
-
-} // namespace
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-class ConstantInitializerBase : public ir::OperationVisitor
-{
-public:
- virtual ~ConstantInitializerBase() = default;
-
-public:
- void run()
- {
- assert(tensor_registry());
- for (const auto &it : _init_map)
- {
- const auto &ind = it.first;
- const auto &fn = it.second;
-
- const auto &model_obj = _operands.at(ind);
- auto tensor_obj = tensor_registry()->getNativeITensor(ind);
- assert(tensor_obj != nullptr);
- fn(model_obj, *tensor_obj);
- VERBOSE(FillOperandData) << "Fill data for operand " << ind.value() << std::endl;
- }
- _init_map.clear();
- }
-
-public:
- ConstantInitializerBase(const ir::Operands &operands)
- : _operands{operands}, _current_layout{ir::Layout::UNKNOWN}
- {
- }
-
-public:
- using Initializer = std::function<void(const ir::Operand &, backend::ITensor &)>;
-
- void setLayout(ir::Layout layout) { _current_layout = layout; }
-
-protected:
- virtual std::shared_ptr<ITensorRegistry> tensor_registry() const = 0;
-
-public:
- virtual void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj)
- {
- registerPermuteInitializer(index, obj); // as default
- }
-
-public:
- void registerCopyInitializer(const ir::OperandIndex &index, const ir::Operand &obj);
- void registerPermuteInitializer(const ir::OperandIndex &index, const ir::Operand &obj);
-
-public:
- void registerCustomInitializer(const ir::OperandIndex &index, const ir::Operand &obj,
- void (*customInit)(const onert::ir::Operand &model_obj,
- onert::backend::ITensor &obj))
- {
- // For only CONSTANTS
- // TODO Add to check if tensor has been allocated
- if (!obj.isConstant())
- return;
-
- using namespace std::placeholders;
- _init_map[index] = std::bind(customInit, _1, _2);
- }
-
-public:
- bool exist(const ir::OperandIndex &ind) { return _init_map.find(ind) != _init_map.end(); }
-
-protected:
- const ir::Operands &_operands;
- std::unordered_map<ir::OperandIndex, Initializer> _init_map;
- ir::Layout _current_layout;
-};
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_BASE_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_COMMON_DYNAMICTENSOR_MANAGER_H__
-#define __ONERT_BACKEND_CPU_COMMON_DYNAMICTENSOR_MANAGER_H__
-
-#include "MemoryManager.h"
-#include "TensorRegistry.h"
-
-#include <backend/IDynamicTensorManager.h>
-#include <ir/OperandInfo.h>
-#include <ir/Operation.h>
-#include <ir/Index.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-// TODO Find optimized algorithm to manage memory.
-
-/**
- * @brief Class to manage dynamic tensor and its memory
- */
-class DynamicTensorManager : public backend::IDynamicTensorManager
-{
-public:
- DynamicTensorManager(const std::shared_ptr<TensorRegistry> ®);
-
- virtual ~DynamicTensorManager() = default;
-
- void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
- ir::Layout backend_layout);
-
- void planDealloc(ir::OperationIndex op_ind, backend::ITensor *tensor) override;
- void deallocInput(ir::OperationIndex op_ind) override;
-
- std::shared_ptr<DynamicMemoryManager> dynamic_mem_mgr() { return _dynamic_mem_mgr; }
-
-private:
- const ITensor *getRawITensor(ir::OperandIndex ind);
-
-private:
- /**
- * @brief Memory manager for dynamic tensor.
- * @todo DynamicMemoryManager is not optimized. Optimized one is needed
- */
- std::shared_ptr<DynamicMemoryManager> _dynamic_mem_mgr;
- const std::shared_ptr<TensorRegistry> _tensors;
-
- // contains list of dynamic tensor index, which can be deallocated after running operation
- // note: this map could contain static tensor index too. Careful use is required.
- std::unordered_map<ir::OperationIndex, std::unordered_set<backend::ITensor *>>
- _dealloc_tensor_map;
-};
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_COMMON_DYNAMICTENSOR_MANAGER_H__
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_IMEMORY_PLANNER_H__
-#define __ONERT_BACKEND_IMEMORY_PLANNER_H__
-
-#include "ir/OperandIndexMap.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-/**
- * @brief Structure to have memory offset and size
- */
-struct Block
-{
- uint32_t offset;
- size_t size;
-};
-
-/**
- * @brief Interface to plan memory
- */
-struct IMemoryPlanner
-{
- using MemoryPlans = ir::OperandIndexMap<Block>;
-
- /**
- * @brief Claim memory for operand
- * @param[in] index The operand index
- * @param[in] size The size of the memory
- */
- virtual void claim(const ir::OperandIndex &, size_t) = 0;
- /**
- * @brief Release memory for operand
- * @param[in] index The operand index
- */
- virtual void release(const ir::OperandIndex &) = 0;
- /**
- * @brief Get capacity for memory planning
- * @return The value of capacity
- */
- virtual uint32_t capacity() = 0;
- /**
- * @brief Get MemoryPlans
- * @return MemoryPlans
- */
- virtual MemoryPlans &memory_plans() = 0;
-
- virtual ~IMemoryPlanner() = default;
-};
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_IMEMORY_PLANNER_H__
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_COMMON_KERNEL_GENERATOR_BASE_H__
-#define __ONERT_BACKEND_CPU_COMMON_KERNEL_GENERATOR_BASE_H__
-
-#include <assert.h>
-#include <memory>
-#include <functional>
-
-#include "ir/OperationVisitor.h"
-#include "ir/OpSequence.h"
-#include <memory>
-#include "exec/FunctionSequence.h"
-#include "backend/ITensorRegistry.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-class KernelGeneratorBase : public ir::OperationVisitor
-{
-public:
- virtual ~KernelGeneratorBase() = default;
-
- std::unique_ptr<exec::IFunction> releaseFunction()
- {
- assert(_return_fn);
- return std::move(_return_fn);
- }
-
- std::unique_ptr<exec::FunctionSequence> generate(const ir::OpSequence &op_seq)
- {
- op_seq.accept(*this);
- return std::move(_return_fn_seq);
- }
-
-protected:
- using OperationVisitor::visit;
-
- void visit(const ir::OpSequence &) override
- {
- throw std::runtime_error("KernelGenerator: NYI for operation 'OpSequence'");
- }
-
-#define OP(InternalName) \
- void visit(const ir::operation::InternalName &) override \
- { \
- throw std::runtime_error("KernelGenerator: NYI for operation '" #InternalName "'"); \
- }
-#include "ir/Operations.lst"
-#undef OP
-
-protected:
- std::unique_ptr<exec::IFunction> _return_fn;
- std::unique_ptr<exec::FunctionSequence> _return_fn_seq; // TODO Extract this out
-};
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_COMMON_KERNEL_GENERATOR_BASE_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_MEMORY_MANAGER_H__
-#define __ONERT_BACKEND_CPU_MEMORY_MANAGER_H__
-
-#include "Allocator.h"
-#include "backend/IMemoryManager.h"
-#include "IMemoryPlanner.h"
-
-namespace onert
-{
-namespace backend
-{
-
-class ITensor;
-
-namespace cpu_common
-{
-
-class MemoryManager : public backend::IMemoryManager
-{
-public:
- MemoryManager();
- MemoryManager(const std::string);
- virtual ~MemoryManager() = default;
-
- void allocate(void) override;
- uint8_t *getBuffer(const ir::OperandIndex &ind) const;
- void deallocate(void) override { _mem_alloc->release(); }
-
- void claimPlan(const ir::OperandIndex &ind, uint32_t size);
- void releasePlan(const ir::OperandIndex &ind);
-
-private:
- IMemoryPlanner *createMemoryPlanner();
- IMemoryPlanner *createMemoryPlanner(const std::string);
-
-private:
- ir::OperandIndexMap<Block> _tensor_mem_map;
- std::shared_ptr<IMemoryPlanner> _mem_planner;
- std::shared_ptr<Allocator> _mem_alloc;
-};
-
-class DynamicMemoryManager
-{
-public:
- DynamicMemoryManager() = default;
- virtual ~DynamicMemoryManager() = default;
-
- std::shared_ptr<Allocator> allocate(const ITensor *tensor, uint32_t capacity);
- void deallocate(const ITensor *tensor);
- void deallocate(void);
-
-private:
- std::unordered_map<const ITensor *, std::shared_ptr<Allocator>> _mem_alloc_map;
-};
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_MEMORY_MANAGER_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_COMMON_STATICTENSOR_MANAGER_H__
-#define __ONERT_BACKEND_CPU_COMMON_STATICTENSOR_MANAGER_H__
-
-#include "backend/IStaticTensorManager.h"
-#include "backend/cpu_common/DynamicTensorManager.h"
-#include "backend/cpu_common/MemoryManager.h"
-#include "backend/cpu_common/TensorRegistry.h"
-#include "backend/ITensorManager.h"
-#include "ir/OperandIndexMap.h"
-#include "ir/OperandInfo.h"
-#include "TensorRegistry.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-class DynamicTensorManager;
-
-class StaticTensorManager : public backend::IStaticTensorManager
-{
-public:
- StaticTensorManager(const std::shared_ptr<TensorRegistry> ®,
- DynamicTensorManager *dynamic_tensor_manager);
- virtual ~StaticTensorManager() = default;
-
- void allocateNonconsts(void);
- void deallocateNonconsts(void);
-
- void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
- ir::Layout backend_layout, bool as_const);
-
- void claimPlan(const ir::OperandIndex &ind, uint32_t size);
- void releasePlan(const ir::OperandIndex &ind);
-
- void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
-
-private:
- std::unique_ptr<MemoryManager> _nonconst_mgr;
- const std::shared_ptr<TensorRegistry> _tensors;
- ir::OperandIndexMap<bool> _as_constants;
- DynamicTensorManager *_dynamic_tensor_manager;
-};
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_COMMON_STATICTENSOR_MANAGER_H__
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_COMMON_TENSOR_H__
-#define __ONERT_BACKEND_CPU_COMMON_TENSOR_H__
-
-#include "Allocator.h"
-
-#include <backend/IPortableTensor.h>
-#include <ir/OperandInfo.h>
-#include <ir/Data.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-class DynamicMemoryManager;
-
-class Tensor : public IPortableTensor
-{
-public:
- Tensor() = delete;
- virtual ~Tensor();
-
-public:
- Tensor(const ir::OperandInfo &info, const ir::Layout layout,
- DynamicMemoryManager *dynamic_mem_mgr)
- : IPortableTensor(info), _layout(layout), _buffer(nullptr), _num_references(0),
- _dynamic_mem_mgr(dynamic_mem_mgr), _allocator(nullptr)
- {
- // DO NOTHING
- }
-
-public:
- // Only one of two method 'setBuffer' must be called once
-
- /**
- * @brief Set the Buffer object. This method is called for static and non-const tensor
- */
- void setBuffer(uint8_t *buffer)
- {
- assert(_buffer == nullptr);
- _buffer = buffer;
- }
-
- /**
- * @brief Set the Buffer object. This method is called for dynamic or const tensor
- */
- void setBuffer(const std::shared_ptr<Allocator> &alloc)
- {
- assert(_buffer == nullptr);
- _allocator = alloc;
- _buffer = alloc->base();
- }
-
- // This works just as setBuffer but it simply overwrite existing Allocator without nullptr check
- void overwriteBuffer(const std::shared_ptr<Allocator> &alloc)
- {
- _allocator = alloc;
- _buffer = alloc->base();
- }
-
- /**
- * @brief Mark this tensor does not have memory.
- * Real memory deallocation should be done by caller.
- */
- void resetBuffer()
- {
- _allocator.reset();
- _buffer = nullptr;
- }
-
-public:
- uint8_t *buffer() const override { return _buffer; }
- /**
- * @brief Get dimension by index
- *
- * @param index Index to get diemension
- * @return size_t Dimension at index
- * @note N : dimension(0)
- * H : dimension(1)
- * W : dimension(2)
- * C : dimension(3)
- */
- size_t dimension(size_t index) const final override { return _info.shape().dim(index); }
- size_t num_dimensions() const override { return _info.shape().rank(); }
- size_t total_size() const override { return _info.total_size(); }
- size_t calcOffset(const ir::Coordinates &coords) const override;
- ir::Layout layout() const override { return _layout; }
- ir::DataType data_type() const override { return _info.typeInfo().type(); }
- float data_scale() const override { return _info.typeInfo().scale(); }
- int32_t data_offset() const override { return _info.typeInfo().offset(); }
- bool is_constant() const override { return _info.isConstant(); }
- bool is_dynamic() const override { return _info.isDynamic(); }
- void set_dynamic() override { _info.setDynamic(); }
- bool applyShape(const ir::Shape &new_shape) override;
- const ir::Sparsity *sparsity() const override { return _info.typeInfo().sparsity(); }
-
- virtual void increase_ref()
- {
- assert(is_dynamic() ||
- // when not dynamic
- (_buffer != nullptr));
-
- ++_num_references;
- }
-
- virtual void decrease_ref()
- {
- assert(_buffer != nullptr || _allocator != nullptr);
- assert(_num_references > 0);
- --_num_references;
- // constant tensor and dynamic tensor has _allocator
- if (_num_references == 0)
- {
- if (_buffer != nullptr)
- _buffer = nullptr;
- if (_allocator != nullptr)
- {
- _allocator->release();
- _allocator = nullptr;
- }
- }
- }
-
- /**
- * @brief Reset reference count to zero and release data
- */
- virtual void reset_ref()
- {
- assert(_buffer != nullptr || _allocator != nullptr);
- assert(_num_references > 0);
- _num_references = 0;
-
- // Only constant tensor has allocator pointer
- if (_buffer != nullptr)
- _buffer = nullptr;
- else
- {
- _allocator->release();
- _allocator = nullptr;
- }
- }
-
- virtual int32_t num_references() { return _num_references; }
-
- void setShape(const ir::Shape &new_shape) override;
-
-protected:
- ir::Layout _layout;
- uint8_t *_buffer;
- int32_t _num_references;
- DynamicMemoryManager *_dynamic_mem_mgr;
-
-private:
- /**
- * @brief Memory allocator for dynamic tensor and const tensor
- * Since maintaing _allocator and also _buffer makes confusion,
- * we will mainly use _buffer (not _allocator.base()) for memory pointer in this code.
- * _allocator(shared_ptr) is used to guarantee that we have valid _buffer.
- */
- std::shared_ptr<Allocator> _allocator;
-};
-
-/**
- * @brief Class that uses data from external memory that is not managed by a backend
- * instead of allocating and copying the data. ExternalTensor's data pointer points to
- * an address of memory such as where memory is already allocated, or mmapped area.
- * This is meaning that ExternalTensor can take all of types' ir::Data.
- * To support this, assume below things no padding, always NHWC layout,
- * constant tensor and not dynamic.
- */
-class ExternalTensor : public Tensor
-{
-public:
- ExternalTensor() = delete;
- virtual ~ExternalTensor();
-
-public:
- ExternalTensor(const ir::OperandInfo &info, const ir::Layout layout)
- : Tensor(info, layout, nullptr)
- {
- assert(_layout == ir::Layout::NHWC);
- assert(_info.isConstant());
- assert(_info.isDynamic() == false);
- }
-
-public:
- /**
- * @brief set Data to be shared from external so that this ExternalTensor will not be
- * allocated on CPU backend
- * @param[in] data data of Operand to be set
- */
- void setData(const std::shared_ptr<ir::Data> data)
- {
- assert(data != nullptr);
- _data = data;
- // Note. Some op such as cker::Conv could take buffer as nullptr.
- // That's why _buffer also would be used
- _buffer = const_cast<uint8_t *>(_data->base());
- }
-
-public:
- uint8_t *buffer() const override { return _buffer; }
-
- bool is_constant() const override { return true; }
- bool is_dynamic() const override { return false; }
- void set_dynamic() override
- {
- throw std::runtime_error("This tensor does not support changing dynamic");
- }
-
- void setShape(const ir::Shape &) override
- {
- throw std::runtime_error("This tensor does not support changing shape");
- }
-
- void increase_ref() override { ++_num_references; }
-
- void decrease_ref() override
- {
- assert(_data != nullptr);
- assert(_num_references > 0);
- --_num_references;
- if (_num_references == 0)
- {
- _data.reset();
- _buffer = nullptr;
- }
- }
-
- /**
- * @brief Reset reference count to zero and release data
- */
- void reset_ref() override
- {
- assert(_data != nullptr);
- assert(_num_references > 0);
- _num_references = 0;
-
- _data.reset();
- _buffer = nullptr;
- }
-
- int32_t num_references() override { return _num_references; }
-
-private:
- std::shared_ptr<const ir::Data> _data;
-};
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_COMMON_TENSOR_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_COMMON_TENSOR_REGISTRY__
-#define __ONERT_BACKEND_CPU_COMMON_TENSOR_REGISTRY__
-
-#include "backend/ITensorRegistry.h"
-#include "Tensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-using TensorRegistry = PortableTensorRegistryTemplate<cpu_common::Tensor>;
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_COMMON_TENSOR_REGISTRY__
#include "ir/Operands.h"
#include "backend/Backend.h"
-#include "backend/controlflow/Backend.h"
+#include "backend/builtin/Backend.h"
namespace onert
{
public:
backend::Backend *get(const std::string &key);
const backend::Backend *get(const std::string &key) const;
- const backend::controlflow::Backend *getControlflow() const;
+ const backend::builtin::Backend *getBuiltin() const;
const std::vector<const backend::Backend *> getAll() const
{
std::vector<const backend::Backend *> v;
private:
std::map<std::string, std::unique_ptr<void, dlhandle_destroy_t>> _handle_map;
std::map<std::string, std::unique_ptr<backend::Backend, backend_destroy_t>> _gen_map;
- backend::controlflow::Backend *_controlflow{nullptr};
+ backend::builtin::Backend *_builtin{nullptr};
/**
- * @brief load controlflow backend
+ * @brief load builtin backend
*
* @param backend backend to be loaded
*
* @return
*/
- void loadControlflowBackend();
+ void loadBuiltinBackend();
};
} // namespace compiler
#define __ONERT_COMPILER_CODE_MAP_H__
#include <unordered_map>
+#include "ir/Index.h"
+#include "ir/Operation.h"
+#include "exec/FunctionSequence.h"
+#include "OperationLowerInfo.h"
namespace onert
{
struct CodeAndInfo
{
- const ir::OpSequence *op_seq;
- const ir::operation::LowerInfo *lower_info;
+ ir::OperationIndex op_ind;
+ const ir::Operation *op;
+ const OperationLowerInfo *lower_info;
std::unique_ptr<exec::FunctionSequence> fn_seq;
- CodeAndInfo(const ir::OpSequence *op_seq, const ir::operation::LowerInfo *lower_info,
+ CodeAndInfo(const ir::OperationIndex op_ind, const ir::Operation *op,
+ const OperationLowerInfo *lower_info,
std::unique_ptr<exec::FunctionSequence> &&fn_seq)
- : op_seq{op_seq}, lower_info{lower_info}, fn_seq{std::move(fn_seq)}
+ : op_ind{op_ind}, op{op}, lower_info{lower_info}, fn_seq{std::move(fn_seq)}
{
}
};
-using CodeMap = std::unordered_map<ir::OpSequenceIndex, CodeAndInfo>;
+using CodeMap = std::unordered_map<ir::OperationIndex, CodeAndInfo>;
} // namespace compiler
} // namespace onert
// OPTIONS ONLY FOR DEBUGGING/PROFILING
std::string trace_filepath; //< File path to save trace records
int graph_dump_level; //< Graph dump level, values between 0 and 2 are valid
- int op_seq_max_node; //< Number of nodes that can be
std::string executor; //< Executor name to use
ManualSchedulerOptions manual_scheduler_options; //< Options for ManualScheduler
bool he_scheduler; //< HEScheduler if true, ManualScheduler otherwise
State state(void) const { return _state; }
- /**
- * @brief Check if model can compile
- * @return @c true if model can compile, otherwise @c false
- * @note This method don't check model correctness,\n
- * so model verification should be done before calling this method
- */
- bool checkCompilable();
CompilerOptions &options() { return _options; }
/**
#include <memory>
-#include "ir/operation/LowerInfo.h"
-#include "ir/OpSequence.h"
+#include "ir/Index.h"
#include "exec/FunctionSequence.h"
#include "CodeMap.h"
class ExecutionBuilder
{
public:
- void append(const ir::OpSequenceIndex index, CodeAndInfo &&code_and_info)
+ void append(const ir::OperationIndex index, CodeAndInfo &&code_and_info)
{
_code_map.emplace(index, std::move(code_and_info));
}
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_GRAPH_LOWER_INFO_H__
+#define __ONERT_COMPILER_GRAPH_LOWER_INFO_H__
+
+#include <memory>
+#include <unordered_map>
+
+#include "compiler/OperandLowerInfo.h"
+#include "compiler/OperationLowerInfo.h"
+#include "util/ObjectManager.h"
+#include "ir/Index.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+struct GraphLowerInfo
+{
+ util::ObjectManager<ir::OperationIndex, OperationLowerInfo> operation;
+ util::ObjectManager<ir::OperandIndex, OperandLowerInfo> operand;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_GRAPH_LOWER_INFO_MAP_H__
* limitations under the License.
*/
-#ifndef __ONERT_IR_LOWERED_GRAPH_H__
-#define __ONERT_IR_LOWERED_GRAPH_H__
+#ifndef __ONERT_COMPILER_LOWERED_GRAPH_H__
+#define __ONERT_COMPILER_LOWERED_GRAPH_H__
#include "ir/Graph.h"
-#include "ir/LowerInfoMap.h"
-#include "ir/OpSequences.h"
+#include "compiler/GraphLowerInfo.h"
#include "compiler/BackendResolver.h"
#include "compiler/Compiler.h"
ir::Graph &graph() { return _graph; }
const ir::Graph &graph() const { return _graph; }
- const ir::LowerInfoMap *getLowerInfo() const { return &_lower_info_map; }
- const ir::operation::LowerInfo *getLowerInfo(const ir::OpSequenceIndex &op_seq_index) const;
- void setLowerInfo(const ir::OpSequenceIndex &op_seq_index,
- std::unique_ptr<ir::operation::LowerInfo> &&lower_info);
- void removeLowerInfo(const ir::OpSequenceIndex &op_seq_index);
- const ir::operand::LowerInfo *getLowerInfo(const ir::OperandIndex &index) const;
- ir::operand::LowerInfo *getLowerInfo(const ir::OperandIndex &index);
- void setLowerInfo(const ir::OperandIndex &index,
- std::unique_ptr<ir::operand::LowerInfo> &&lower_info);
- void removeLowerInfo(const ir::OperandIndex &index);
- ir::OpSequences &op_seqs() { return _op_seqs; }
- const ir::OpSequences &op_seqs() const { return _op_seqs; }
- void iterateTopolOpSeqs(
- const std::function<void(const ir::OpSequenceIndex &, const ir::OpSequence &)> &fn) const;
- void
- iterateTopolOpSeqs(const std::function<void(const ir::OpSequenceIndex &, ir::OpSequence &)> &fn);
- const backend::BackendContexts &backend_contexts() { return _backend_contexts; }
- const backend::BackendContexts &backend_contexts() const { return _backend_contexts; }
+ const compiler::GraphLowerInfo &lower_info() const { return _lower_info_map; }
+ compiler::GraphLowerInfo &lower_info() { return _lower_info_map; }
std::shared_ptr<ir::OperationIndexMap<int64_t>> indexed_ranks() { return _indexed_ranks; }
-private:
- void
- makeOpSequences(ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
- const compiler::CompilerOptions &options,
- const compiler::BackendResolver &backend_resolver);
+ void setHasDynamicTensor(ir::OperationIndex ind, bool val)
+ {
+ _has_dynamic_tensor_map.emplace(ind, val);
+ }
+ bool getHasDynamicTensor(ir::OperationIndex ind) const
+ {
+ auto itr = _has_dynamic_tensor_map.find(ind);
+ return (itr == _has_dynamic_tensor_map.end()) ? false : itr->second;
+ }
- void manipulateLowerInfo(
- ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info);
+private:
+ void makeLowerInfo(const compiler::BackendResolver &backend_resolver);
void dumpLowerInfo();
- bool mergeable(const ir::OpSequenceIndex &op_seq_index, const ir::OperationIndex &node_index,
- ir::Layout layout, const compiler::BackendResolver &backend_resolver);
- ir::OpSequenceIndex appendFreshSingleOpSequence(const ir::OperationIndex &node_index,
- const ir::Operation &node);
private:
ir::Graph _graph;
- backend::BackendContexts _backend_contexts;
std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
- ir::LowerInfoMap _lower_info_map;
- // Pass(for Perm) can accept only graph so that Graph has OpSequences as a member
- ir::OpSequences _op_seqs;
+ compiler::GraphLowerInfo _lower_info_map;
+ ir::OperationIndexMap<bool> _has_dynamic_tensor_map;
};
} // namespace compiler
} // namespace onert
-#endif // __ONERT_IR_LOWERED_GRAPH_H__
+#endif // __ONERT_COMPILER_LOWERED_GRAPH_H__
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_OPERAND_LOWER_INFO_H__
+#define __ONERT_COMPILER_OPERAND_LOWER_INFO_H__
+
+#include <functional>
+#include <stdint.h>
+
+#include "compiler/PermuteFactor.h"
+#include "util/Set.h"
+
+namespace onert
+{
+namespace backend
+{
+class Backend;
+} // namespace backend
+} // namespace onert
+
+namespace onert
+{
+namespace compiler
+{
+
+using PermuteFactorSet = util::Set<PermuteFactor>;
+
+class OperandLowerInfo
+{
+public:
+ OperandLowerInfo()
+ {
+ // DO NOTHING
+ }
+
+public:
+ const PermuteFactorSet &def_factors(void) const { return _def_factors; }
+ const PermuteFactorSet &use_factors(void) const { return _use_factors; }
+
+public:
+ void addDefPermuteFactor(const PermuteFactor &factor) { _def_factors.add(factor); }
+ void addUsePermuteFactor(const PermuteFactor &factor) { _use_factors.add(factor); }
+ void removeDefPermuteFactor(const PermuteFactor &factor) { _def_factors.remove(factor); }
+ void removeUsePermuteFactor(const PermuteFactor &factor) { _use_factors.remove(factor); }
+
+private:
+ PermuteFactorSet _def_factors;
+ PermuteFactorSet _use_factors;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_OPERAND_LOWER_INFO_H__
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_OP_SEQUENCE_LOWER_INFO_H__
+#define __ONERT_COMPILER_OP_SEQUENCE_LOWER_INFO_H__
+
+#include <string>
+
+#include <compiler/PermuteFactor.h>
+#include <ir/Layout.h>
+
+namespace onert
+{
+namespace backend
+{
+class Backend;
+} // namespace backend
+} // namespace onert
+
+namespace onert
+{
+namespace compiler
+{
+
+class OperationLowerInfo
+{
+public:
+ OperationLowerInfo(const backend::Backend *backend, ir::Layout layout);
+ const backend::Backend *backend() const { return _permute_factor.backend(); }
+ ir::Layout layout() const { return _permute_factor.layout(); }
+
+private:
+ PermuteFactor _permute_factor;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_OP_SEQUENCE_LOWER_INFO_H__
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file PermuteFactor.h
+ * @brief This file contains PermuteFactor class
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __ONERT_COMPILER_OPERAND_PERMUTE_FACTOR_H__
+#define __ONERT_COMPILER_OPERAND_PERMUTE_FACTOR_H__
+
+#include <functional>
+
+#include "ir/Layout.h"
+
+namespace onert
+{
+namespace backend
+{
+class Backend;
+} // namespace backend
+} // namespace onert
+
+namespace onert
+{
+namespace compiler
+{
+
+/**
+ * @brief Class that has factors of permutation
+ */
+class PermuteFactor
+{
+public:
+ /**
+ * @brief Construct PermuteFactor object.
+ * @param backend The backend factor
+ * @param layout The layout factor
+ */
+ PermuteFactor(const backend::Backend *backend, ir::Layout layout)
+ : _backend{backend}, _layout{layout}
+ {
+ // DO NOTHING
+ }
+ /**
+ * @brief Construct PermuteFactor object by copy semantics.
+ */
+ PermuteFactor(const PermuteFactor &f) : _backend{f._backend}, _layout{f._layout}
+ {
+ // DO NOTHING
+ }
+ /**
+ * @brief Construct PermuteFactor object by move semantics.
+ */
+ PermuteFactor(PermuteFactor &&) = default;
+
+public:
+ /**
+ * @brief Get backend
+ *
+ * @return Backend factor
+ */
+ const backend::Backend *backend() const { return _backend; }
+ /**
+ * @brief Get layout
+ *
+ * @return Layout factor
+ */
+ ir::Layout layout() const { return _layout; }
+
+public:
+ /**
+ * @brief operator overloading function for `==`
+ *
+ * @return Whether two PermuteFactor are the same
+ */
+ bool operator==(const PermuteFactor &other) const
+ {
+ return _backend == other.backend() && _layout == other.layout();
+ }
+ /**
+ * @brief operator overloading function for `!=`
+ *
+ * @return Whether two PermuteFactor are differenct
+ */
+ bool operator!=(const PermuteFactor &other) const { return !(*this == other); }
+
+private:
+ const backend::Backend *_backend{nullptr};
+ ir::Layout _layout{ir::Layout::UNKNOWN};
+};
+
+} // namespace compiler
+} // namespace onert
+
+namespace std
+{
+
+/**
+ * @brief Structure that provides hash value of PermuteFactor
+ */
+template <> struct hash<onert::compiler::PermuteFactor>
+{
+ size_t operator()(const onert::compiler::PermuteFactor &factor) const noexcept
+ {
+ hash<const onert::backend::Backend *> b_hash{};
+ hash<onert::ir::Layout> l_hash{};
+ return b_hash(factor.backend()) ^ (l_hash(factor.layout()) << 1);
+ }
+};
+
+} // namespace std
+
+std::ostream &operator<<(std::ostream &os, const onert::compiler::PermuteFactor &obj);
+
+#endif // __ONERT_COMPILER_OPERAND_PERMUTE_FACTOR_H__
#define __ONERT_COMPILER_STATIC_SHAPE_INFERER_H__
#include "ir/OperationVisitor.h"
-#include "ir/OpSequence.h"
#include "compiler/LoweredGraph.h"
#include "ir/Index.h"
{
public:
StaticShapeInferer(
- const ir::SubgraphIndex &subg_idx,
- const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
- &lowered_subgs)
- : _lowered_subgs(lowered_subgs), _operands(lowered_subgs.at(subg_idx)->graph().operands()),
- _operations(lowered_subgs.at(subg_idx)->graph().operations()),
- _return_has_dynamic_tensor(false)
+ const ir::SubgraphIndex &subg_idx,
+ const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
+ &lowered_subgs)
+ : _lowered_subgs(lowered_subgs), _operands(lowered_subgs.at(subg_idx)->graph().operands()),
+ _operations(lowered_subgs.at(subg_idx)->graph().operations()),
+ _return_has_dynamic_tensor(false)
{ /* empty */
}
virtual ~StaticShapeInferer() = default;
* @brief Infer shape of operands beloning to ops and set the output shape.
* If output shape cannot be known without running op, mark it so that it can be allocated
* when running kernel.
- * @param op_seq sequence of operations
- * @return @c true if op_seq's input or output has any dynamic tensor; @c false otherwise.
+ * @param op Operation
+ * @return @c true if op's input or output has any dynamic tensor; @c false otherwise.
*/
- bool infer(const ir::OpSequence &op_seq);
+ bool infer(const ir::Operation &op);
void dump();
private:
+ void inferSubgraph(ir::SubgraphIndex subg_ind);
bool checkDynamicInput(const ir::Operation &op);
void setDynamicOutput(const ir::Operation &op);
private:
const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
- &_lowered_subgs;
+ &_lowered_subgs;
// _operands and _operations can be changed by controlflow operation
ir::Operands &_operands; // operands of current subgraph
ir::Operations &_operations; // operations of current subgraph
#include "ir/Operands.h"
#include "ir/OperationVisitor.h"
#include "ir/Index.h"
-#include "backend/IDynamicTensorManager.h"
-#include "backend/ITensorManager.h"
#include "backend/ITensorRegistry.h"
#include <map>
public:
DynamicShapeInferer(const ir::Operands &operands,
const std::shared_ptr<backend::ITensorRegistry> &tensor_registry)
- : _operands(operands), _tensor_registry(tensor_registry)
+ : _operands(operands), _tensor_registry(tensor_registry)
{
UNUSED_RELEASE(_operands);
UNUSED_RELEASE(_tensor_registry);
*/
void handleSimpleUnaryOp(const ir::Operation &op, const ir::OperandIndex input_idx);
+ // in case of output tensor of an op, it is possible that
+ // the output became dynamic although it had been static before.
+ // Once a tensor becomes dynamic, it will lost memory allocated for static.
+ // Therefore once output is dynamic, it should be treated as dynamic tensor. (memory should be
+ // allocated at runtime) `previously` means `dynamic` or `static` has been set in previous loop in
+ // WHILE of previous call of `nnfw_run()`
+ bool previously_static(backend::ITensor *op_output) { return !op_output->is_dynamic(); }
+
+ // helper function that check if op's input is static
+ // Note that input of n'th op has been set to static or dynamic by (n-1)th op.
+ // That's why it is called `currently_static`
+ bool currently_static(backend::ITensor *op_input) { return !op_input->is_dynamic(); }
+
private:
/**
* @brief To get operand-level info, e.g., ir::Operand::isConstant()
#include "exec/DynamicShapeInferer.h"
#include "ir/Operations.h"
#include "backend/ITensorRegistry.h"
-#include "backend/IDynamicTensorManager.h"
namespace onert
{
public: // methods related to dynamic tensor
struct DynamicTensorCtx
{
- const ir::OpSequence *op_seq = nullptr;
+ ir::OperationIndex op_ind;
const ir::Operations *operations = nullptr;
std::shared_ptr<exec::DynamicShapeInferer> dynamic_shape_inferer = nullptr;
- backend::IDynamicTensorManager *dynamic_tensor_manager = nullptr;
};
/**
namespace backend
{
class IPortableTensor;
-namespace controlflow
+namespace builtin
{
class IOTensor;
}
-}
-}
+} // namespace backend
+} // namespace onert
namespace onert
{
namespace exec
*
* @return Vector of @c IOTensor
*/
- virtual const std::vector<backend::controlflow::IOTensor *> &getOutputTensors() const = 0;
+ virtual const std::vector<backend::builtin::IOTensor *> &getOutputTensors() const = 0;
};
using ExecutorMap = std::unordered_map<ir::SubgraphIndex, std::unique_ptr<IExecutor>>;
InputDesc(void) = delete;
InputDesc(const ir::OperandInfo &info, const void *buffer, const size_t size, ir::Layout layout)
- : info(info), buffer(buffer), size(size), layout(layout)
+ : info(info), buffer(buffer), size(size), layout(layout)
{
}
};
OutputDesc(void) = delete;
OutputDesc(const ir::OperandInfo &info, void *buffer, const size_t size, ir::Layout layout)
- : info(info), buffer(buffer), size(size), layout(layout)
+ : info(info), buffer(buffer), size(size), layout(layout)
{
}
};
public:
MMapedData(int fd, const std::ptrdiff_t mmap_offset, const size_t mmap_size,
const std::ptrdiff_t data_offset, const size_t data_size)
- : ExternalData(nullptr, data_size),
- _mmap_base(
- static_cast<uint8_t *>(mmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE, fd, mmap_offset))),
- _mmap_size(mmap_size), _offset(data_offset - mmap_offset)
+ : ExternalData(nullptr, data_size),
+ _mmap_base(
+ static_cast<uint8_t *>(mmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE, fd, mmap_offset))),
+ _mmap_size(mmap_size), _offset(data_offset - mmap_offset)
{
// DO NOTHING
}
#include "ir/Operands.h"
#include "ir/Operations.h"
-#include "ir/OpSequence.h"
-#include "ir/OpSequences.h"
#include "ir/Subgraphs.h"
namespace onert
// Graph Building
public:
OperandIndex addOperand(const Shape &shape, const TypeInfo &type);
+ /**
+ * @brief Add an operand to the graph with the given index and object
+ *
+ * If the given index is available, it succeeds. And @c operand is moved which invalidates the
+ * caller's pointer. If the given index is already taken, it fails. And @c operand will not be
+ * moved so the caller's pointer will be still valid.
+ *
+ * @param[in] index Index to be added
+ * @param[in] operand Operand to be added
+ * @return OperandIndex @c index if successful, Undefined otherwise
+ */
+ OperandIndex addOperand(OperandIndex index, std::unique_ptr<Operand> &&operand);
OperationIndex addOperation(std::unique_ptr<Operation> &&node);
+ /**
+ * @brief Add an operation to the graph with the given index and object
+ *
+ * If the given index is available, it succeeds. And @c operation is moved which invalidates the
+ * caller's pointer. If the given index is already taken, it fails. And @c operation will not be
+ * moved so the caller's pointer will be still valid.
+ *
+ * @param index Index to be added
+ * @param operation Operation to be added
+ * @return OperandIndex @c index if successful, Undefined otherwise
+ */
+ OperationIndex addOperation(OperationIndex index, std::unique_ptr<Operation> &&operation);
void setOperandValue(const OperandIndex &ind, std::shared_ptr<Data> data);
void addInput(const OperandIndex &ind, const std::string &name = "");
void addOutput(const OperandIndex &ind, const std::string &name = "");
- void finishBuilding(void);
+ void verify(void);
void removeOperand(const OperandIndex &ind) { _operands.remove(ind); }
- bool isBuildingPhase(void) const { return _phase == Phase::BUILDING; }
void setLayout(Layout layout) { _layout = layout; }
void setSubgraphs(const std::shared_ptr<Subgraphs> &subgs) { _subgraphs = subgs; }
private:
+ bool checkOperandsForOperation(const Operation &operation);
+ void linkOperandToOperation(OperationIndex index, const Operation &operation);
void initializeUseDef();
+ // TODO Rename to `sweepUnusedOperands`
+ // TODO Make this public
void sweepGarbageOperands();
// Custom operations support
std::shared_ptr<Subgraphs> &subgraphs() { return _subgraphs; }
Layout layout() const { return _layout; }
+ // Topological sort
+public:
+ std::vector<ir::OperationIndex> topolSortOperations() const;
+
private:
- Phase _phase{Phase::BUILDING};
Operations _operations;
Operands _operands;
OperandIndexSequence _inputs;
#include "util/Index.h"
+#include <ostream>
+
namespace onert
{
namespace ir
struct IOIndexTag;
using IOIndex = ::onert::util::Index<uint32_t, IOIndexTag>;
-struct OpSequenceIndexTag;
-using OpSequenceIndex = ::onert::util::Index<uint32_t, OpSequenceIndexTag>;
-
struct SubgraphIndexTag;
using SubgraphIndex = ::onert::util::Index<uint32_t, SubgraphIndexTag>;
+template <typename IndexType>
+std::ostream &_index_print_impl(std::ostream &o, const std::string &prefix, IndexType index)
+{
+ if (index.undefined())
+ return o << prefix << std::string("?");
+ else
+ return o << prefix << index.value();
+}
+
+inline std::ostream &operator<<(std::ostream &o, const OperationIndex &i)
+{
+ return _index_print_impl(o, "@", i);
+}
+
+inline std::ostream &operator<<(std::ostream &o, const OperandIndex &i)
+{
+ return _index_print_impl(o, "%", i);
+}
+
+inline std::ostream &operator<<(std::ostream &o, const IOIndex &i)
+{
+ return _index_print_impl(o, "IO", i);
+}
+
+inline std::ostream &operator<<(std::ostream &o, const SubgraphIndex &i)
+{
+ return _index_print_impl(o, "SUBGRAPH", i); // $ubgraph
+}
+
} // namespace ir
} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_LOWER_INFO_MAP_H__
-#define __ONERT_IR_LOWER_INFO_MAP_H__
-
-#include <memory>
-#include <unordered_map>
-
-#include "ir/operand/LowerInfo.h"
-#include "ir/operation/LowerInfo.h"
-#include "ir/OperandIndexMap.h"
-#include "ir/Index.h"
-
-namespace onert
-{
-namespace ir
-{
-
-struct LowerInfoMap
-{
- std::unordered_map<OpSequenceIndex, std::unique_ptr<operation::LowerInfo>> op_seq;
- OperandIndexMap<std::unique_ptr<operand::LowerInfo>> operand;
-};
-
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_LOWER_INFO_MAP_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OP_SEQUENCE_H__
-#define __ONERT_IR_OP_SEQUENCE_H__
-
-#include <vector>
-#include <string>
-#include <memory>
-
-#include "ir/Layout.h"
-#include "ir/Index.h"
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-
-class Operations;
-
-class OpSequence
-{
-public:
- explicit OpSequence(Layout layout);
- OpSequence(const OpSequence &) = delete;
-
-public:
- void accept(OperationVisitor &v) const;
-
-public:
- const OperandIndexSequence &getInputs() const { return _inputs; }
- const OperandIndexSequence &getOutputs() const { return _outputs; }
- void setInputs(const OperandIndexSequence &indexes) { _inputs = indexes; }
- void setOutputs(const OperandIndexSequence &indexes) { _outputs = indexes; }
- void replaceInputs(const OperandIndex &from, const OperandIndex &to)
- {
- _inputs.replace(from, to);
- }
- void replaceOutputs(const OperandIndex &from, const OperandIndex &to)
- {
- _outputs.replace(from, to);
- }
-
- void appendOperation(const OperationIndex &index) { _operations.emplace_back(index); }
-
- std::vector<OperationIndex> &operations(void) { return _operations; }
-
- const std::vector<OperationIndex> &operations(void) const { return _operations; }
-
- uint32_t size(void) const { return _operations.size(); }
-
-public:
- void remove(const OperationIndex &index);
-
- bool exist(const OperationIndex &index) const;
-
-public:
- Layout getLayout() const { return _layout; }
-
-public:
- std::vector<OperationIndex>::const_iterator begin() const { return _operations.begin(); }
- std::vector<OperationIndex>::const_iterator end() const { return _operations.end(); }
-
-public:
- /**
- * @brief Set @c true if any operation in this opSequence has dynamic input
- * or dynamic output;
- * @c false if all operations' inputs and outputs are static tensors
- */
- void has_dynamic_tensor(bool has_dynamic_tensor) { _has_dynamic_tensor = has_dynamic_tensor; }
- bool has_dynamic_tensor() const { return _has_dynamic_tensor; }
-
-private:
- OperandIndexSequence _inputs;
- OperandIndexSequence _outputs;
- std::vector<OperationIndex> _operations;
-
-private:
- Layout _layout;
- bool _has_dynamic_tensor;
-};
-
-std::string getStrFromOpSeq(const OpSequence &op_seq, const Operations &operations);
-
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OP_SEQUENCE_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OP_SEQUENCES_H__
-#define __ONERT_IR_OP_SEQUENCES_H__
-
-#include "ir/Index.h"
-#include "ir/OpSequence.h"
-#include "util/ObjectManager.h"
-
-namespace onert
-{
-namespace ir
-{
-
-/**
- * @brief Class that manages OpSequence objects
- */
-class OpSequences : public util::ObjectManager<OpSequenceIndex, OpSequence>
-{
-public:
- /**
- * @brief Create an instance of OpSequence with given op and push it to objects
- *
- * @param[in] op_idx Operation index that is emplaced
- * @param[in] layout OpSequence's layout
- * @return OpSequenceIndex
- */
- OpSequenceIndex emplace(const OperationIndex &op_index, Layout layout);
-
- /**
- * @brief Push an instance of OpSequence to objects
- *
- * @param[in] op_seq An instance of OpSequence
- * @return OpSequenceIndex
- */
- OpSequenceIndex emplace(std::unique_ptr<OpSequence> &&op_seq);
- /**
- * @brief Check if an operation does exist in any OpSequences
- *
- * @param operation_index Operation index to find
- * @return true If such operation exists in any OpSequences otherwise false
- */
- bool containsOperation(const OperationIndex &operation_index) const;
- /**
- * @brief Find an operation from all OpSequences
- *
- * @param operation_index Operation index to find
- * @return OpSequenceIndex Index of OpSequence that contains given operation index
- */
- OpSequenceIndex getOperation(const OperationIndex &operation_index) const;
- /**
- * @brief Remove an operation from OpSequence
- *
- * @param operation_index Operation index to be removed
- */
- void removeFromOpSequence(const OperationIndex &operation_index);
-
-private:
- void cacheSequenceIndex(const OpSequenceIndex &seq_index, const OperationIndex &op_index) const;
- OpSequenceIndex *findSequenceIndex(const OperationIndex &operation_index) const;
-
- OpSequenceIndex findOperation(const OperationIndex &operation_index) const;
- mutable std::unordered_map<OperationIndex, OpSequenceIndex> _seq_indexes;
-};
-
-/**
- * @brief Dump OpSequences
- *
- * @param op_seqs Operation Sequences
- * @param operations Operation context
- */
-void dumpOpSequences(const OpSequences &op_seqs, const Operations &operations);
-
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OP_SEQUENCES_H__
{
public:
explicit Operand(const Shape &shape, const TypeInfo &type)
- : _info{shape, type, MemAllocType::STATIC}
+ : _info{shape, type, MemAllocType::STATIC}
{
// DO NOTHING
}
void removeUse(const OperationIndex &idx);
void setDef(const OperationIndex &idx);
void unsetDef();
+ void clearDefUse();
public:
void type(const DataType type) { _info.type(type); };
public:
OperandIndexSequence operator+(const OperandIndexSequence &other) const;
- friend std::ostream &operator<<(std::ostream &o, const OperandIndexSequence &op_seq);
+ friend std::ostream &operator<<(std::ostream &o, const OperandIndexSequence &operand_seq);
public:
std::vector<OperandIndex>::const_iterator begin(void) const { return _vec.begin(); }
*/
OperandInfo(const Shape &shape, const TypeInfo &typeInfo, MemAllocType alloc_type,
bool is_const = false, bool is_variable = false)
- : _shape(shape), _typeInfo(typeInfo), _alloc_type(alloc_type), _const(is_const),
- _variable(is_variable)
+ : _shape(shape), _typeInfo(typeInfo), _alloc_type(alloc_type), _const(is_const),
+ _variable(is_variable)
{
// DO NOTHING
}
#define __ONERT_IR_OPERATION_VISITOR_H__
#include "ir/Operations.Include.h"
-#include "ir/OpSequence.h"
namespace onert
{
virtual void visit(const operation::InternalName &) {}
#include "ir/Operations.lst"
#undef OP
-
- // This OpSequence node should be handled specially so that
- // Op.lst doesn't have OpSequence
- // TODO Remove by pushing it down to derived classes.
- virtual void visit(const OpSequence &)
- {
- throw std::runtime_error{
- "OperationVisitor: This does not privide visit function in OpSequence"};
- }
};
} // namespace ir
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
-*/
+ */
#ifndef __ONERT_IR_SHAPE_H__
#define __ONERT_IR_SHAPE_H__
* @param[in] width The width value
*/
FeatureShape(int32_t batch, int32_t depth, int32_t height, int32_t width)
- : N{batch}, C{depth}, H{height}, W{width}
+ : N{batch}, C{depth}, H{height}, W{width}
{
// DO NOTHING
}
return rank() == 0 ? 1 : _dimensions.at(i);
}
+ // TODO Fix different behavior with const version
int32_t &dim(int i) { return _dimensions.at(i); }
/**
Shape permuteShape(const Shape &shape, Layout frontend_layout, Layout backend_layout);
/**
-* @brief Find out if tha rank in this shape is "maybe" unspecified.
-* Note that when rank == 0, shape could represent scalar or unspecified rank
-* \see https://developer.android.com/ndk/reference/struct/a-neural-networks-operand-type
-*/
+ * @brief Find out if tha rank in this shape is "maybe" unspecified.
+ * Note that when rank == 0, shape could represent scalar or unspecified rank
+ * \see https://developer.android.com/ndk/reference/struct/a-neural-networks-operand-type
+ */
inline bool rankMaybeUnspecified(const ir::Shape &shape) { return (shape.rank() == 0); }
} // namespace ir
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
-*/
+ */
#ifndef __ONERT_IR_SPARSITY_H__
#define __ONERT_IR_SPARSITY_H__
Sparsity() = default;
Sparsity(std::vector<uint16_t> &&w1_segments, std::vector<uint16_t> &&w1_indices,
std::vector<int32_t> &&block_size)
- : _w1_segments(w1_segments), _w1_indices(w1_indices), _block_size(block_size)
+ : _w1_segments(w1_segments), _w1_indices(w1_indices), _block_size(block_size)
{
}
#ifndef __ONERT_IR_TYPEINFO_H__
#define __ONERT_IR_TYPEINFO_H__
+#include <cassert>
#include <cstdint>
#include <memory>
#include <vector>
namespace ir
{
+struct Quantization
+{
+ std::vector<float> scales;
+ std::vector<int32_t> zero_points;
+};
+
class TypeInfo
{
public:
TypeInfo() = delete;
- explicit TypeInfo(DataType type, float scale = 0, int32_t offset = 0)
- : _type(type), _scale(scale), _offset(offset), _sparsity(nullptr)
+ explicit TypeInfo(DataType type) : _type{type}, _sparsity{nullptr} {}
+
+ TypeInfo(DataType type, float scale, int32_t zero_point) : _type{type}, _sparsity{nullptr}
{
+ quantization(scale, zero_point);
}
public:
DataType type() const { return _type; }
- float scale() const { return _scale; }
- int32_t offset() const { return _offset; }
+ float scale() const
+ {
+ assert(_quant.scales.size() == 1);
+ return _quant.scales[0];
+ }
+ const std::vector<float> &scales() const { return _quant.scales; }
+ int32_t zero_point() const
+ {
+ assert(_quant.zero_points.size() == 1);
+ return _quant.zero_points[0];
+ }
+ const std::vector<int32_t> &zero_points() const { return _quant.zero_points; }
const ir::Sparsity *sparsity() const { return _sparsity.get(); }
+ void quantization(float scale, int32_t zero_point)
+ {
+ _quant.scales.resize(1);
+ _quant.scales[0] = scale;
+ _quant.zero_points.resize(1);
+ _quant.zero_points[0] = zero_point;
+ }
+ void quantization(std::vector<float> &&scales, std::vector<int32_t> &&zero_points)
+ {
+ _quant.scales = scales;
+ _quant.zero_points = zero_points;
+ }
void sparsity(std::shared_ptr<ir::Sparsity> sparsity) { _sparsity = sparsity; }
public:
private:
DataType _type;
- // for quantization
- float _scale;
- int32_t _offset;
- // for sparsity
+ ir::Quantization _quant;
std::shared_ptr<ir::Sparsity> _sparsity;
};
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERAND_LOWER_INFO_H__
-#define __ONERT_IR_OPERAND_LOWER_INFO_H__
-
-#include <functional>
-#include <stdint.h>
-
-#include "ir/operand/PermuteFactor.h"
-#include "util/Set.h"
-
-namespace onert
-{
-namespace backend
-{
-class Backend;
-} // namespace backend
-} // namespace onert
-
-namespace onert
-{
-namespace ir
-{
-namespace operand
-{
-using PermuteFactorSet = util::Set<PermuteFactor>;
-
-class LowerInfo
-{
-public:
- LowerInfo()
- {
- // DO NOTHING
- }
-
-public:
- const PermuteFactorSet &def_factors(void) const { return _def_factors; }
- const PermuteFactorSet &use_factors(void) const { return _use_factors; }
-
-public:
- void addDefPermuteFactor(const PermuteFactor &factor) { _def_factors.add(factor); }
- void addUsePermuteFactor(const PermuteFactor &factor) { _use_factors.add(factor); }
- void removeDefPermuteFactor(const PermuteFactor &factor) { _def_factors.remove(factor); }
- void removeUsePermuteFactor(const PermuteFactor &factor) { _use_factors.remove(factor); }
-
-private:
- PermuteFactorSet _def_factors;
- PermuteFactorSet _use_factors;
-};
-
-} // namespace operand
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERAND_LOWER_INFO_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file PermuteFactor.h
- * @brief This file contains onert::ir::operand::PermuteFactor class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __ONERT_IR_OPERAND_PERMUTE_FACTOR_H__
-#define __ONERT_IR_OPERAND_PERMUTE_FACTOR_H__
-
-#include <functional>
-
-#include "ir/Layout.h"
-
-namespace onert
-{
-namespace backend
-{
-class Backend;
-} // namespace backend
-} // namespace onert
-
-namespace onert
-{
-namespace ir
-{
-namespace operand
-{
-
-/**
- * @brief Class that has factors of permutation
- */
-class PermuteFactor
-{
-public:
- /**
- * @brief Construct PermuteFactor object.
- * @param backend The backend factor
- * @param layout The layout factor
- */
- PermuteFactor(const backend::Backend *backend, Layout layout) : _backend{backend}, _layout{layout}
- {
- // DO NOTHING
- }
- /**
- * @brief Construct PermuteFactor object by copy semantics.
- */
- PermuteFactor(const PermuteFactor &f) : _backend{f._backend}, _layout{f._layout}
- {
- // DO NOTHING
- }
- /**
- * @brief Construct PermuteFactor object by move semantics.
- */
- PermuteFactor(PermuteFactor &&) = default;
-
-public:
- /**
- * @brief Get backend
- *
- * @return Backend factor
- */
- const backend::Backend *backend() const { return _backend; }
- /**
- * @brief Get layout
- *
- * @return Layout factor
- */
- Layout layout() const { return _layout; }
-
-public:
- /**
- * @brief operator overloading function for `==`
- *
- * @return Whether two PermuteFactor are the same
- */
- bool operator==(const PermuteFactor &other) const
- {
- return _backend == other.backend() && _layout == other.layout();
- }
- /**
- * @brief operator overloading function for `!=`
- *
- * @return Whether two PermuteFactor are differenct
- */
- bool operator!=(const PermuteFactor &other) const { return !(*this == other); }
-
-private:
- const backend::Backend *_backend{nullptr};
- Layout _layout{Layout::UNKNOWN};
-};
-
-} // namespace operand
-} // namespace ir
-} // namespace onert
-
-namespace std
-{
-
-/**
- * @brief Structure that provides hash value of PermuteFactor
- */
-template <> struct hash<onert::ir::operand::PermuteFactor>
-{
- size_t operator()(const onert::ir::operand::PermuteFactor &factor) const noexcept
- {
- hash<const onert::backend::Backend *> b_hash{};
- hash<onert::ir::Layout> l_hash{};
- return b_hash(factor.backend()) ^ (l_hash(factor.layout()) << 1);
- }
-};
-
-} // namespace std
-
-#endif // __ONERT_IR_OPERAND_PERMUTE_FACTOR_H__
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_LOWER_INFO_H__
-#define __ONERT_IR_OPERATION_LOWER_INFO_H__
-
-#include <string>
-
-#include <ir/operand/PermuteFactor.h>
-
-namespace onert
-{
-namespace backend
-{
-class Backend;
-} // namespace backend
-} // namespace onert
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class LowerInfo
-{
-public:
- LowerInfo(const backend::Backend *backend, Layout layout);
- const backend::Backend *backend() const { return _permute_factor.backend(); }
- Layout layout() const { return _permute_factor.layout(); }
-
-private:
- operand::PermuteFactor _permute_factor;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_LOWER_INFO_H__
CONFIG(NCNN_LAYOUT , std::string , "NCHW")
CONFIG(PROFILING_MODE , bool , "0")
CONFIG(USE_SCHEDULER , bool , "0")
-CONFIG(OP_SEQ_MAX_NODE , int , "0")
CONFIG(TRACE_FILEPATH , std::string , "")
CONFIG(FP16_ENABLE , bool , "0")
CONFIG(RUY_THREADS , int , "-1")
CONFIG(OP_BACKEND_ ## InternalName, std::string, "")
#include "ir/Operations.lst"
#undef OP
-
{
public:
InsufficientBufferSizeException(const std::string &msg)
- : OnertException{"InsufficientBufferSize", msg}
+ : OnertException{"InsufficientBufferSize", msg}
{
}
};
{
const auto end_time = std::chrono::steady_clock::now();
_timer_res =
- std::chrono::duration_cast<std::chrono::microseconds>(end_time - _start_time).count();
+ std::chrono::duration_cast<std::chrono::microseconds>(end_time - _start_time).count();
};
private:
*/
T value() const { return _index; }
- friend std::ostream &operator<<(std::ostream &o, const Index &t)
- {
- if (t.undefined())
- return o << std::string("undefined");
- else
- return o << t.value();
- }
-
private:
T _index;
};
#include <memory>
+#include "util/logging.h"
+
namespace onert
{
namespace util
template <typename Index, typename Object> class ObjectManager
{
public:
- ObjectManager() : _index_count{0u} {}
+ ObjectManager() : _next_index{0u} {}
public:
/**
- * @brief Create an object with args and put it in the container with a new Index for that
+ * @brief Create an object with args and put it in the container with a newly assigned @c Index
*
* @param[in] args Arguments for creating Operand object
- * @return Created index that is associated to the object
+ * @return Created index that is associated to the object if successful, Undefined index otherwise
*/
template <class... Args> Index emplace(Args &&... args)
{
auto index = generateIndex();
+ if (!index.valid())
+ return index;
_objects.emplace(index, std::make_unique<Object>(std::forward<Args>(args)...));
return index;
}
/**
- * @brief Put object in the container with a new Index for that
+ * @brief Put the object in the container with given index.
+ *
+ * It fails when the given index is already taken or @c index is Undefined.
*
* @param[in] object Object to be pushed
- * @return Created index that is associated to the object
+ * @param[in] index Index associated with the object
+ * @return @c index if successful, an Undefined index otherwise
+ */
+ Index push(std::unique_ptr<Object> &&object, Index index)
+ {
+ auto gen_index = tryIndex(index);
+ if (gen_index.valid())
+ _objects.emplace(gen_index, std::move(object));
+ return gen_index;
+ }
+ /**
+ * @brief Put the object in the container with a newly assigned index.
+ *
+ * It fails when it cannot generate a valid index.
+ *
+ * @param[in] object Object to be pushed
+ * @return The newly assigned index if successful, an Undefined index otherwise
*/
Index push(std::unique_ptr<Object> &&object)
{
- auto index = generateIndex();
- _objects.emplace(index, std::move(object));
+ auto gen_index = generateIndex();
+ if (gen_index.valid())
+ _objects.emplace(gen_index, std::move(object));
+ return gen_index;
+ }
+ /**
+ * @brief Set the object in the container with given index.
+ *
+ * If the index is Undefined, it will fail.
+ * If the index is already taken, it will overwrite the content.
+ *
+ * @param[in] object Object to be pushed
+ * @param[in] index Index associated with the object
+ * @return @c index if successful, an Undefined index otherwise
+ */
+ Index set(Index index, std::unique_ptr<Object> &&object)
+ {
+ if (index.valid())
+ _objects[index] = std::move(object);
return index;
}
-
/**
* @brief Remove the object that is associated with the given index
*
/**
* @brief Get the object that is associated with the given index
*
+ * If such object does not exist, it will throw @c std::out_of_range
+ *
* @param[in] index Index of the object to be returned
* @return Object
*/
/**
* @brief Get the object that is associated with the given index
*
+ * If such object does not exist, it will throw @c std::out_of_range
+ *
* @param[in] index Index of the object to be returned
* @return Object
*/
Object &at(const Index &index) { return *(_objects.at(index)); }
+ /**
+ * @brief Get the object that is associated with the given index
+ *
+ * If such object does not exist, it will return `nullptr`
+ *
+ * @param[in] index Index of the object to be returned
+ * @return Object
+ */
+ const Object *getRawPtr(const Index &index) const
+ {
+ auto itr = _objects.find(index);
+ if (itr == _objects.end())
+ return nullptr;
+ else
+ {
+ assert(itr->second != nullptr);
+ return itr->second.get();
+ }
+ }
+ /**
+ * @brief Get the object that is associated with the given index
+ *
+ * If such object does not exist, it will return `nullptr`
+ *
+ * @param[in] index Index of the object to be returned
+ * @return Object The found object
+ */
+ Object *getRawPtr(const Index &index)
+ {
+ return const_cast<Object *>(
+ const_cast<const ObjectManager<Index, Object> *>(this)->getRawPtr(index));
+ }
/**
* @brief Get the object that is associated with the given index
*
auto it = _objects.find(index);
return it != _objects.end();
}
+ /**
+ * @brief Return the number of objects that the manager contains
+ *
+ * @return size_t Number of objects
+ */
+ size_t size() const { return _objects.size(); }
/**
* @brief Iterate over the container with given function
*
}
private:
- Index generateIndex() { return Index{_index_count++}; }
+ // Try assigning the given index
+ Index tryIndex(Index index)
+ {
+ if (!index.valid())
+ return index;
+ if (_objects.find(index) == _objects.end())
+ {
+ // If the given index does not exist, update the next index and return the index
+ if (index.value() >= _next_index)
+ _next_index = index.value() + 1;
+ return index;
+ }
+ else
+ {
+ // If the given index exists already, return a non-valid index
+ return Index{};
+ }
+ }
+
+ // Generate a new index with `_next_index`
+ Index generateIndex()
+ {
+ // No need to check if there is an entry with _next_index since
+ // _next_index is always ("the highest index in the object map" + 1)
+ if (Index{_next_index}.valid())
+ return Index{_next_index++};
+ else
+ return Index{};
+ }
protected:
std::unordered_map<Index, std::unique_ptr<Object>> _objects;
- uint32_t _index_count;
+ uint32_t _next_index;
};
} // namespace util
#include "ir/Index.h"
#include "ir/Layout.h"
#include "ir/OperationVisitor.h"
-#include "backend/IDynamicTensorManager.h"
#include "backend/ITensor.h"
#include "backend/ITensorRegistry.h"
ir::Shape inferSelectShape(const ir::Shape &input_cond_shape, const ir::Shape &input_true_shape,
const ir::Shape &input_false_shape);
-ir::Shape inferSliceShape(const ir::Shape &input_shape, const int32_t *begins_buf,
- const int32_t *sizes_buf);
+template <typename T>
+ir::Shape inferSliceShape(const ir::Shape &input_shape, const T *begins_buf, const T *sizes_buf);
ir::Shape inferSpaceToBatchNDShape(const ir::Shape &input_shape, const ir::Shape &block_shape_shape,
const ir::Shape &padding_shape, const int32_t *block_shape_buf,
uint32_t getSessionId() const { return _session_id; }
+ /**
+ * @brief Return true if more than 1 session exist
+ *
+ * @note This method is NOT thread-safe. Call this in thread-safe situation.
+ */
+ bool hasMultipleSessions() const { return _next_session_id > 1; }
+
/**
* @brief Set subgraph index of a graph
*/
{
std::unique_lock<std::mutex> lock{_session_id_mutex};
- static uint32_t next_session_id = 0;
- _session_id = next_session_id++;
+ _session_id = _next_session_id++;
}
private:
std::unordered_map<const ir::Graph *, ir::SubgraphIndex> _subgraph_indices;
uint32_t _session_id;
static std::mutex _session_id_mutex;
+ static uint32_t _next_session_id;
};
} // namespace util
#define __ONERT_UTIL_LOGGING_H__
#include <iostream>
+#include <cstring>
#include "util/ConfigSource.h"
static Context &ctx = Context::get();
+inline std::string decorated_name(const char *input)
+{
+ const int min_prefix = 16;
+ std::string prefix(input);
+ auto len_prefix = prefix.size();
+ if (len_prefix > min_prefix)
+ return "[" + prefix + "] ";
+ std::string spaces((min_prefix - len_prefix) / 2, ' ');
+ return (len_prefix % 2 ? "[ " : "[") + spaces + prefix + spaces + "] ";
+}
+
} // namespace logging
} // namespace util
} // namespace onert
#define VERBOSE(name) \
if (::onert::util::logging::ctx.enabled()) \
- std::cout << "[" << #name << "] "
+ std::cout << ::onert::util::logging::decorated_name(#name)
#define VERBOSE_F() \
if (::onert::util::logging::ctx.enabled()) \
- std::cout << "[" << __func__ << "] "
+ std::cout << ::onert::util::logging::decorated_name(__func__)
#define WHEN_LOG_ENABLED(METHOD) \
if (::onert::util::logging::ctx.enabled()) \
namespace backend
{
-void BackendContext::initialize(const std::vector<OperationInfo> &operation_list,
- const std::vector<ir::OperandIndex> &operand_list)
-{
- _operation_list = operation_list;
- _operand_list = operand_list;
-}
-
} // namespace backend
} // namespace onert
namespace backend
{
-ir::Shape ITensor::getShape() const
-{
- onert::ir::Shape shape(num_dimensions());
- for (uint32_t d = 0; d < num_dimensions(); d++)
- shape.dim(d) = dimension(d);
-
- return shape;
-}
+// `dynamic_cast` not working across library boundaries on NDK
+// With this as a key function, `dynamic_cast` works across dl
+ITensor::~ITensor() {}
} // namespace backend
} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/basic/Allocator.h"
+
+#include "util/logging.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+Allocator::Allocator(uint32_t capacity)
+{
+ _base = std::make_unique<uint8_t[]>(capacity);
+
+ VERBOSE(ALLOC) << "allocation capacity: " << capacity << std::endl;
+ VERBOSE(ALLOC) << "base pointer: " << static_cast<void *>(_base.get()) << std::endl;
+}
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/basic/BackendContextHelpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/basic/DynamicTensorManager.h"
+
+#include "util/logging.h"
+#include "misc/polymorphic_downcast.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<TensorRegistry> ®)
+ : _dynamic_mem_mgr{new DynamicMemoryManager()}, _tensors{reg}
+{
+ // DO NOTHING
+}
+
+void DynamicTensorManager::buildTensor(const ir::OperandIndex &ind,
+ const ir::OperandInfo &tensor_info,
+ ir::Layout backend_layout)
+{
+ assert(_tensors->getNativeTensor(ind) == nullptr);
+ auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout, _dynamic_mem_mgr.get());
+ _tensors->setNativeTensor(ind, std::move(tensor));
+}
+
+const ITensor *DynamicTensorManager::getRawITensor(ir::OperandIndex ind)
+{
+ auto ptr = _tensors->getITensor(ind);
+ assert(ptr);
+ return ptr;
+}
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <backend/basic/MemoryManager.h>
+
+#include <cassert>
+
+#include "MemoryPlannerFactory.h"
+#include "util/ConfigSource.h"
+#include "util/logging.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+MemoryManager::MemoryManager() : _mem_planner{createMemoryPlanner()}
+{
+ // DO NOTHING
+}
+
+MemoryManager::MemoryManager(const std::string planner_id)
+ : _mem_planner{createMemoryPlanner(planner_id)}
+{
+ // DO NOTHING
+}
+
+basic::IMemoryPlanner *MemoryManager::createMemoryPlanner()
+{
+ auto planner_id = util::getConfigString(util::config::CPU_MEMORY_PLANNER);
+ return basic::MemoryPlannerFactory::get().create(planner_id);
+}
+
+basic::IMemoryPlanner *MemoryManager::createMemoryPlanner(const std::string planner_id)
+{
+ return basic::MemoryPlannerFactory::get().create(planner_id);
+}
+
+void MemoryManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
+{
+ _mem_planner->claim(ind, size);
+}
+
+void MemoryManager::releasePlan(const ir::OperandIndex &ind) { _mem_planner->release(ind); }
+
+void MemoryManager::allocate(void)
+{
+ _mem_alloc = std::make_shared<basic::Allocator>(_mem_planner->capacity());
+ assert(_mem_alloc->base());
+}
+
+uint8_t *MemoryManager::getBuffer(const ir::OperandIndex &ind) const
+{
+ assert(_mem_planner->memory_plans().find(ind) != _mem_planner->memory_plans().end());
+ const auto &mem_blk = _mem_planner->memory_plans().at(ind);
+ return _mem_alloc->base() + mem_blk.offset;
+}
+
+std::shared_ptr<basic::Allocator> DynamicMemoryManager::allocate(const ITensor *tensor,
+ uint32_t capacity)
+{
+ auto find = _mem_alloc_map.find(tensor);
+ if (find != _mem_alloc_map.end())
+ throw std::runtime_error("Cannot allocate memory for a tensor. It was already allocated.");
+
+ _mem_alloc_map[tensor] = std::make_shared<basic::Allocator>(capacity);
+ return _mem_alloc_map[tensor];
+}
+
+void DynamicMemoryManager::deallocate(const ITensor *tensor)
+{
+ auto find = _mem_alloc_map.find(tensor);
+ if (find == _mem_alloc_map.end())
+ throw std::runtime_error("Cannot find Allocator for the requested index");
+
+ find->second->release(); // explicitly erase memory
+ _mem_alloc_map.erase(find); // remove tensor and alloc
+}
+
+void DynamicMemoryManager::deallocate(void)
+{
+ for (auto &mem_alloc : _mem_alloc_map)
+ {
+ // Release memory buffer of mem_alloc
+ mem_alloc.second->release();
+ }
+
+ _mem_alloc_map.clear();
+}
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MemoryPlanner.h"
+#include "util/logging.h"
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+void BumpPlanner::claim(const ir::OperandIndex &ind, size_t size)
+{
+ Block blk{_capacity, size};
+ _mem_plans[ind] = blk;
+ _capacity += size;
+
+ VERBOSE(BP_PLANNER) << "CLAIM(" << ind << "): " << blk.offset << ", " << blk.size << std::endl;
+}
+
+void BumpPlanner::release(const ir::OperandIndex &ind)
+{
+ VERBOSE(BP_PLANNER) << "RELEASE(" << ind << "): "
+ << "NOTHING does" << std::endl;
+}
+
+// There are some assumptions for claiming memory(== making a reservation for memory).
+// 1. About _claim_table(std::map).
+// - The table's data structure is std::map so that it always sorts
+// value(OperandIndex) by key(base_offset).
+// - This claim() inserts key/value into _claim_table and the release() removes the key/value from
+// _claim_table.
+// - _claim_table shows the memory status at a certain point in time. Therefore,
+// - If _claim_table has an offset and a certain size at a certain point in time,
+// it means the place at the offset has been already claimed(== can't claim now. need to find
+// someplace new).
+// - If _claim_table doesn't have any element for an offset and a certain size at a certain
+// point in time, it means the place at the offset can be claimed.
+// 2. In the loop for _claim_table, we can assume the current claim_base_offset value is bigger than
+// the previous claim_base_offset.
+void FirstFitPlanner::claim(const ir::OperandIndex &ind, size_t size)
+{
+ // Find the right position for claiming
+ uint32_t next_offset = 0;
+ for (auto &mem_claim : _claim_table)
+ {
+ auto claimed_base_offset = mem_claim.first;
+ auto claimed_size = _mem_plans[mem_claim.second].size;
+ if (next_offset + size <= claimed_base_offset)
+ {
+ break;
+ }
+ else
+ {
+ next_offset = claimed_base_offset + claimed_size;
+ }
+ }
+
+ // Now next_offset is set to the proper offset
+ _claim_table[next_offset] = ind;
+ _mem_plans[ind] = {next_offset, size};
+
+ VERBOSE(FF_PLANNER) << "claim(" << ind << "): [+" << next_offset << ", " << size << "sz]"
+ << std::endl;
+
+ if (_capacity < next_offset + size)
+ {
+ _capacity = next_offset + size;
+ }
+}
+
+void FirstFitPlanner::release(const ir::OperandIndex &ind)
+{
+ for (auto it = _claim_table.cbegin(); it != _claim_table.cend(); ++it)
+ {
+ if (it->second == ind)
+ {
+ uint32_t offset = it->first;
+ uint32_t index = ind.value();
+ uint32_t size = _mem_plans[ind].size;
+
+ _claim_table.erase(it);
+
+ VERBOSE(FF_PLANNER) << "release(" << index << "): [+" << offset << ", " << size << "sz]"
+ << std::endl;
+ return;
+ }
+ }
+ assert(!"Cannot release for given index. It has been not claimed or released already.");
+}
+
+WICPlanner::WICPlanner()
+ : _initialized(false), _capacity(0), _mem_plans(), _live_operands(), _interference_graph(),
+ _operands()
+{
+ // DO NOTHING
+}
+
+void WICPlanner::claim(const ir::OperandIndex &ind, size_t size)
+{
+ _operands.emplace(size, ind);
+ _interference_graph[ind].insert(_interference_graph[ind].end(), _live_operands.cbegin(),
+ _live_operands.cend());
+ for (const auto &live_operand : _live_operands)
+ {
+ _interference_graph[live_operand].emplace_back(ind);
+ }
+ _live_operands.emplace(ind);
+
+ VERBOSE(WIC_PLANNER) << "claim(" << ind << "): [" << size << "sz]" << std::endl;
+}
+
+void WICPlanner::release(const ir::OperandIndex &ind)
+{
+ _live_operands.erase(ind);
+ VERBOSE(WIC_PLANNER) << "release(" << ind << ")" << std::endl;
+}
+
+/*
+ * Build memory plans using liveness and size of operands
+ * 1. Build inference graph at claim
+ * - Two operands interfere if they have overlapped live range
+ * 2. Sort operands in descending order of size
+ * - Use std::multimap to sort operands
+ * 3. Allocate memory block for sorted operands
+ * - Find free memory block which does not overlap with interfered operands
+ */
+void WICPlanner::buildMemoryPlans()
+{
+ for (const auto &operand : _operands)
+ {
+ uint32_t size = operand.first;
+ const ir::OperandIndex &ind = operand.second;
+ VERBOSE(WIC_PLANNER) << "build_plan(" << ind << "): [" << size << "sz]" << std::endl;
+
+ uint32_t next_offset = 0;
+ if (_interference_graph.count(ind))
+ {
+ // Find interfered memory plans and sort them by offset
+ std::multimap<uint32_t, uint32_t> interfered_plans;
+ for (const auto &interference : _interference_graph[ind])
+ {
+ if (_mem_plans.count(interference))
+ interfered_plans.emplace(_mem_plans[interference].offset, _mem_plans[interference].size);
+ }
+
+ // Find free memory block in first-fit manner
+ for (const auto &interfered_plan : interfered_plans)
+ {
+ auto claimed_base_offset = interfered_plan.first;
+ auto claimed_size = interfered_plan.second;
+ VERBOSE(WIC_PLANNER) << "interfere : [+" << claimed_base_offset << ", " << claimed_size
+ << "sz]" << std::endl;
+ if (next_offset + size <= claimed_base_offset)
+ {
+ break;
+ }
+ else if (next_offset < claimed_base_offset + claimed_size)
+ {
+ next_offset = claimed_base_offset + claimed_size;
+ }
+ }
+ }
+ else
+ {
+ VERBOSE(WIC_PLANNER) << "No interference" << std::endl;
+ }
+
+ _mem_plans[ind] = {next_offset, size};
+ VERBOSE(WIC_PLANNER) << "alloc(" << ind << "): [+" << next_offset << ", " << size << "sz]"
+ << std::endl;
+
+ if (_capacity < next_offset + size)
+ {
+ _capacity = next_offset + size;
+ }
+ }
+ _initialized = true;
+ _interference_graph.clear();
+ _operands.clear();
+}
+
+WICPlanner::MemoryPlans &WICPlanner::memory_plans()
+{
+ if (!_initialized)
+ buildMemoryPlans();
+ return _mem_plans;
+}
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file       MemoryPlanner.h
+ * @brief      This file contains Memory Planning related classes
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_MEMORY_PLANNER_H__
+#define __ONERT_BACKEND_BASIC_MEMORY_PLANNER_H__
+
+#include <map>
+#include <vector>
+#include <unordered_set>
+#include <memory>
+
+#include "backend/basic/Allocator.h"
+#include "backend/basic/IMemoryPlanner.h"
+#include "ir/OperandIndexMap.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+/**
+ * @brief Class to plan memory by bump way
+ */
+class BumpPlanner : public IMemoryPlanner
+{
+public:
+ /**
+ * @brief Claim memory for operand by bump way
+ * @param[in] index The operand index
+ * @param[in] size The size of the memory
+ */
+ void claim(const ir::OperandIndex &, size_t) override;
+ /**
+ * @brief Release memory for operand by bump way
+ * @param[in] index The operand index
+ */
+ void release(const ir::OperandIndex &) override;
+ /**
+ * @brief Get capacity for memory planning
+ * @return The value of capacity
+ */
+ uint32_t capacity() override { return _capacity; }
+ /**
+ * @brief Get MemoryPlans
+ * @return MemoryPlans
+ */
+ MemoryPlans &memory_plans() override { return _mem_plans; }
+
+private:
+ uint32_t _capacity = 0;
+ MemoryPlans _mem_plans;
+};
+
+/**
+ * @brief Class to plan memory by firstfit way
+ */
+class FirstFitPlanner : public IMemoryPlanner
+{
+public:
+ /**
+ * @brief Claim memory for operand by firstfit way
+ * @param[in] index The operand index
+ * @param[in] size The size of the memory
+ */
+ void claim(const ir::OperandIndex &, size_t) override;
+ /**
+ * @brief Release memory for operand by firstfit way
+ * @param[in] index The operand index
+ */
+ void release(const ir::OperandIndex &) override;
+ /**
+ * @brief Get capacity for memory planning
+ * @return The value of capacity
+ */
+ uint32_t capacity() override { return _capacity; }
+ /**
+ * @brief Get MemoryPlans
+ * @return MemoryPlans
+ */
+ MemoryPlans &memory_plans() override { return _mem_plans; }
+
+private:
+ uint32_t _capacity = 0;
+ MemoryPlans _mem_plans;
+ // Use std::map because claim() assumes that _claim_table is sorted by uint32_t(base_offset)
+ std::map<uint32_t, ir::OperandIndex> _claim_table;
+};
+
+/**
+ * @brief Class to plan memory by Weighted Interval Color algorithm
+ */
+class WICPlanner : public IMemoryPlanner
+{
+public:
+ WICPlanner();
+
+ /**
+ * @brief Claim memory for operand by WIC algorithm
+ * @param[in] index The operand index
+ * @param[in] size The size of the memory
+ */
+ void claim(const ir::OperandIndex &, size_t) override;
+ /**
+ * @brief Release memory for operand by WIC algorithm
+ * @param[in] index The operand index
+ */
+ void release(const ir::OperandIndex &) override;
+ /**
+ * @brief Get capacity for memory planning
+ * @return The value of capacity
+ */
+ uint32_t capacity() override
+ {
+ if (!_initialized)
+ buildMemoryPlans();
+ return _capacity;
+ }
+ /**
+ * @brief Get MemoryPlans
+ * @return MemoryPlans
+ */
+ MemoryPlans &memory_plans() override;
+
+private:
+ void buildMemoryPlans();
+
+ bool _initialized;
+ uint32_t _capacity;
+ MemoryPlans _mem_plans;
+ std::unordered_set<ir::OperandIndex> _live_operands;
+ ir::OperandIndexMap<std::vector<ir::OperandIndex>> _interference_graph;
+ // Sort operands by descending order of size
+ std::multimap<uint32_t, ir::OperandIndex, std::greater<uint32_t>> _operands;
+};
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_MEMORY_PLANNER_H__
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "MemoryPlanner.h"
+#include "ir/Index.h"
+
+TEST(Allocator, allocate_test)
+{
+ ::onert::backend::basic::Allocator allocator(1024);
+ ASSERT_NE(allocator.base(), nullptr);
+}
+
+TEST(BumpPlanner, claim_test)
+{
+ ::onert::backend::basic::BumpPlanner planner;
+
+ auto claim = [&planner](uint32_t index, size_t size, uint32_t expected_offset) {
+ onert::ir::OperandIndex mem_idx(index);
+ planner.claim(mem_idx, size);
+ auto mem_blk = planner.memory_plans()[mem_idx];
+ ASSERT_EQ(mem_blk.offset, expected_offset);
+ ASSERT_EQ(mem_blk.size, size);
+ };
+
+ claim(0, 10, 0);
+ claim(1, 20, 10);
+ claim(2, 30, 30);
+}
+
+TEST(FirstFitPlanner, claim_release_test)
+{
+ ::onert::backend::basic::FirstFitPlanner planner;
+
+ auto claim = [&planner](uint32_t index, size_t size, uint32_t expected_offset) {
+ onert::ir::OperandIndex mem_idx(index);
+ planner.claim(mem_idx, size);
+ auto mem_blk = planner.memory_plans()[mem_idx];
+ ASSERT_EQ(mem_blk.offset, expected_offset);
+ ASSERT_EQ(mem_blk.size, size);
+ };
+
+ auto release = [&planner](uint32_t index) {
+ onert::ir::OperandIndex mem_idx(index);
+ planner.release(mem_idx);
+ };
+
+ // 0 CLAIM - 10
+ claim(0, 10, 0);
+
+ // 1 CLAIM - 20
+ claim(1, 20, 10);
+
+ // 2 CLAIM - 30
+ claim(2, 30, 30);
+
+ // 0 RELEASE - 10
+ release(0);
+
+ // 3 CLAIM - 20
+ claim(3, 20, 60);
+
+ // 4 CLAIM - 5
+ claim(4, 5, 0);
+
+ // 5 CLAIM - 10
+ claim(5, 10, 80);
+
+ // 6 CLAIM - 5
+ claim(6, 5, 5);
+
+ // 2 RELEASE - 30
+ release(2);
+
+ // 7 CLAIM - 35
+ claim(7, 35, 90);
+
+ // 8 CLAIM - 10
+ claim(8, 10, 30);
+
+ // 4 RELEASE - 5
+ release(4);
+
+ // 9 CLAIM - 10
+ claim(9, 10, 40);
+
+ // 10 CLAIM - 10
+ claim(10, 10, 50);
+
+ // 6 RELEASE
+ release(6);
+
+ // 1 RELEASE
+ release(1);
+
+ // 8 RELEASE
+ release(8);
+
+ // 9 RELEASE
+ release(9);
+
+ // 10 RELEASE
+ release(10);
+
+ // 3 RELEASE
+ release(3);
+
+ // 5 RELEASE
+ release(5);
+
+ // 7 RELEASE
+ release(7);
+}
+
+TEST(WICPlanner, claim_release_test)
+{
+ ::onert::backend::basic::WICPlanner planner;
+
+ auto claim = [&planner](uint32_t index, size_t size) {
+ onert::ir::OperandIndex mem_idx(index);
+ planner.claim(mem_idx, size);
+ };
+
+ auto release = [&planner](uint32_t index) {
+ onert::ir::OperandIndex mem_idx(index);
+ planner.release(mem_idx);
+ };
+
+ auto verify = [&planner](uint32_t index, uint32_t size, uint32_t expected_offset) {
+ onert::ir::OperandIndex mem_idx(index);
+ auto mem_blk = planner.memory_plans()[mem_idx];
+ ASSERT_EQ(mem_blk.offset, expected_offset);
+ ASSERT_EQ(mem_blk.size, size);
+ };
+
+ auto capacity = [&planner](uint32_t expected_capacity) {
+ auto actual_capacity = planner.capacity();
+ ASSERT_EQ(actual_capacity, expected_capacity);
+ };
+
+ claim(0, 20);
+ claim(1, 5);
+ release(0);
+ claim(2, 10);
+ release(1);
+ claim(3, 10);
+ release(2);
+ claim(4, 10);
+ release(3);
+ claim(5, 20);
+ release(4);
+ claim(6, 20);
+ release(5);
+ release(7);
+
+ // VERIFY 0 - 0
+ verify(0, 20, 0);
+
+ // VERIFY 1 - 20
+ verify(1, 5, 20);
+
+ // VERIFY 2 - 0
+ verify(2, 10, 0);
+
+ // VERIFY 3 - 10
+ verify(3, 10, 10);
+
+ // VERIFY 4 - 20
+ verify(4, 10, 20);
+
+ // VERIFY 5 - 0
+ verify(5, 20, 0);
+
+ // VERIFY 6 - 20
+ verify(6, 20, 20);
+
+ // CAPACITY - 40
+ capacity(40);
+}
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MemoryPlannerFactory.h"
+
+#include "MemoryPlanner.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+MemoryPlannerFactory &MemoryPlannerFactory::get()
+{
+ static MemoryPlannerFactory instance;
+ return instance;
+}
+
+IMemoryPlanner *MemoryPlannerFactory::create(const std::string &key)
+{
+ if (key == "FirstFit")
+ {
+ return new FirstFitPlanner;
+ }
+ else if (key == "Bump")
+ {
+ return new BumpPlanner;
+ }
+ else if (key == "WIC")
+ {
+ return new WICPlanner;
+ }
+ return new FirstFitPlanner; // Default Planner
+}
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_MEMORY_PLANNER_FACTORY_H__
+#define __ONERT_BACKEND_BASIC_MEMORY_PLANNER_FACTORY_H__
+
+#include "backend/basic/IMemoryPlanner.h"
+
+#include <string>
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+class MemoryPlannerFactory
+{
+public:
+ static MemoryPlannerFactory &get();
+
+private:
+ MemoryPlannerFactory() = default;
+
+public:
+ IMemoryPlanner *create(const std::string &key);
+};
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_MEMORY_PLANNER_FACTORY_H__
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/basic/StaticTensorManager.h"
+
+#include "backend/basic/DynamicTensorManager.h"
+#include "backend/basic/Tensor.h"
+#include <util/logging.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> ®,
+ DynamicTensorManager *dynamic_tensor_manager)
+ : _nonconst_mgr{new MemoryManager()}, _tensors{reg}, _dynamic_tensor_manager{
+ dynamic_tensor_manager}
+{
+ // DO NOTHING
+}
+
+void StaticTensorManager::allocateNonconsts(void)
+{
+ _nonconst_mgr->allocate();
+
+ for (auto &pair : _tensors->native_tensors())
+ {
+ const auto &ind = pair.first;
+ auto tensor = pair.second.get();
+ if (!_as_constants[ind] && !tensor->is_dynamic())
+ {
+ auto *buffer = _nonconst_mgr->getBuffer(ind);
+ tensor->setBuffer(buffer);
+
+ VERBOSE(CPU_StaticTensorManager)
+ << "TENSOR " << ind << " : " << static_cast<void *>(buffer) << std::endl;
+ }
+ }
+}
+
+void StaticTensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); }
+
+void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
+ const ir::OperandInfo &tensor_info, ir::Layout backend_layout,
+ bool as_const)
+{
+ assert(!_tensors->getNativeTensor(ind));
+ if (as_const)
+ {
+ auto tensor = std::make_unique<ExternalTensor>(tensor_info, backend_layout);
+ _tensors->setNativeTensor(ind, std::move(tensor));
+ }
+ else
+ {
+ auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout,
+ _dynamic_tensor_manager->dynamic_mem_mgr().get());
+ _tensors->setNativeTensor(ind, std::move(tensor));
+ }
+ _as_constants[ind] = as_const;
+}
+
+void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
+{
+ assert(_tensors->getNativeTensor(ind));
+
+ // This method is called only when a tensor has proper shape
+ assert(!_tensors->getNativeTensor(ind)->is_dynamic());
+
+ if (!_as_constants[ind])
+ _nonconst_mgr->claimPlan(ind, size);
+}
+
+void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
+{
+ assert(_tensors->getNativeTensor(ind));
+
+ // This method is called only when a tensor has proper shape
+ assert(!_tensors->getNativeTensor(ind)->is_dynamic());
+
+ if (!_as_constants[ind])
+ _nonconst_mgr->releasePlan(ind);
+}
+
+void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
+{
+ for (const auto &it : _tensors->native_tensors())
+ fn(it.first);
+}
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/basic/Tensor.h"
+
+#include "ir/DataType.h"
+#include "backend/basic/MemoryManager.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+Tensor::~Tensor() {}
+
+size_t Tensor::calcOffset(const ir::Coordinates &coords) const
+{
+ auto shape = getShape();
+ size_t rank = shape.rank();
+ rank = rank == 0 ? 1 : rank;
+ size_t offset = 0;
+ for (size_t i = 0; i < rank; ++i)
+ {
+ auto dim = shape.rank() == 0 ? 1 : shape.dim(i);
+ offset = offset * dim + coords[i];
+ }
+ offset *= sizeOfDataType(data_type());
+ return offset;
+}
+
+void Tensor::setShape(const ir::Shape &new_shape) { _info.shape(new_shape); }
+
+bool Tensor::applyShape(const ir::Shape &new_shape)
+{
+ bool previously_dynamic = is_dynamic();
+
+ auto allocTensorMem = [&]() {
+ auto capacity = total_size();
+ auto alloc = _dynamic_mem_mgr->allocate(this, capacity);
+ setBuffer(alloc);
+ };
+
+ if (!previously_dynamic || buffer() == nullptr)
+ {
+ // Always set shape - when buffer with same size was already allocated, shape could differ
+ setShape(new_shape);
+ set_dynamic();
+ allocTensorMem();
+ }
+ else
+ {
+ auto previous_size = total_size();
+ auto new_size = new_shape.num_elements() * ir::sizeOfDataType(data_type());
+ if (previous_size != new_size)
+ {
+ _dynamic_mem_mgr->deallocate(this);
+
+ setShape(new_shape);
+ set_dynamic();
+ allocTensorMem();
+ }
+ else
+ { // when buffer with same size was already allocated, shape could differ
+ setShape(new_shape);
+ }
+ }
+ return true;
+}
+
+ir::Shape Tensor::getShape() const { return _info.shape(); }
+
+void Tensor::deallocBuffer()
+{
+ if (_allocator)
+ {
+ _buffer = nullptr;
+ _allocator.reset();
+ if (_dynamic_mem_mgr)
+ {
+ _dynamic_mem_mgr->deallocate(this);
+ }
+ }
+}
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+// ExternalTensor
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+// `dynamic_cast` not working across library boundaries on NDK
+// With this as a key function, `dynamic_cast` works across dl
+ExternalTensor::~ExternalTensor() {}
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <backend/basic/TensorBuilder.h>
+
+#include <util/logging.h>
+
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
+ : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)},
+ _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
+{
+ /* empty */
+}
+
+void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+ ir::Layout layout)
+{
+ _tensor_info_map.emplace(ind, info);
+
+ // CPU backend supports only one layout as NHWC
+ assert(layout == ir::Layout::NHWC);
+ if (info.isDynamic())
+ {
+ _dynamic_tensor_mgr->buildTensor(ind, info, layout);
+ }
+ else
+ {
+ _static_tensor_mgr->buildTensor(ind, info, layout, info.isConstant());
+ }
+}
+
+void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
+{
+ assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
+ const auto tensor_info = _tensor_info_map.at(ind);
+
+ if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
+ {
+ const auto size = tensor_info.total_size();
+ _static_tensor_mgr->claimPlan(ind, size);
+ }
+}
+
+void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
+{
+ if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
+ {
+ _static_tensor_mgr->releasePlan(ind);
+ }
+}
+
+bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
+{
+ return _tensor_info_map.find(ind) != _tensor_info_map.end();
+}
+
+void TensorBuilder::allocate(void) { _static_tensor_mgr->allocateNonconsts(); }
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_BACKEND_H__
+#define __ONERT_BACKEND_BUILTIN_BACKEND_H__
+
+#include "BackendContext.h"
+#include "Config.h"
+#include "KernelGenerator.h"
+#include "TensorBuilder.h"
+#include "Tensor.h"
+
+#include <backend/Backend.h>
+
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+class Backend : public ::onert::backend::Backend
+{
+public:
+ Backend() : _config{std::make_shared<Config>()} {}
+
+ std::shared_ptr<IConfig> config() const override { return _config; }
+
+ std::unique_ptr<onert::backend::BackendContext> newContext(ContextData &&data) const override
+ {
+ auto context = std::make_unique<BackendContext>(this, std::move(data));
+ // ControlFlow backend may not build tensors for itself because the backend's operation uses
+ // tensors of other baceknd instead
+ // But the backend builds tensors in case of that the controlflow operation may have constant
+ // input or that consecutive controflow operations exist. We have to make them not to be built
+ // later
+ // 1. Constant input
+ // These tensors cannot be dynamic tensor, so let's do it as follows:
+ // - always skip copying
+ // - if it is operation's input in child subgraph: register "use" as constant input of the
+ // operations in child subgraph
+ // - if it is child subgraph's output: register "use" as constant input of the operations
+ // using it
+ // 2. Consecutive controflow operation's intermediate tensor
+ // These tensors can be dynamic tensor and this is complicated to support without copying. But
+ // there is no such case until now, let's support it later
+ // TODO Remove TensorBuilder and ConstantInitializer
+ // TODO Support Consecutive controflow operation's intermediate tensor
+ auto tr = std::make_shared<TensorRegistry>();
+ auto tb = std::make_shared<TensorBuilder>(tr);
+ context->tensor_registry = tr;
+ context->tensor_builder = tb;
+ context->kernel_gen = std::make_shared<KernelGenerator>(
+ *context->graph(), tb->dynamicTensorManager(), tr, context->external_context());
+ return context;
+ }
+
+private:
+ std::shared_ptr<IConfig> _config;
+};
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_BACKEND_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "KernelGenerator.h"
+#include "backend/basic/BackendContextHelpers.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+ITensorRegistry *BackendContext::genTensors() { return basic::genTensors(*this); }
+
+FunctionMap BackendContext::genKernels()
+{
+ FunctionMap ret;
+
+ for (auto op_ind : _data.op_order)
+ {
+ auto fn_seq = kernel_gen->generate(op_ind);
+ ret.emplace_back(op_ind, std::move(fn_seq));
+ }
+
+ basic::initConsts(*this);
+
+ // NOTE For memory optimization, we want to free some operand data
+ const_cast<ir::Graph *>(graph())->operands().iterate(
+ [&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
+
+ for (auto &it : ret)
+ {
+ auto &fn_seq = it.second;
+ fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+ }
+
+ return ret;
+}
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_BUILTIN_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "ExternalContext.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+ BackendContext(const Backend *backend, ContextData &&data,
+ std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+ std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+ std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+ : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
+ tensor_builder{tensor_builder}, kernel_gen{kernel_gen},
+ _external_context(std::make_shared<ExternalContext>())
+ {
+ }
+
+ ITensorRegistry *genTensors() override;
+
+ FunctionMap genKernels() override;
+
+ std::shared_ptr<ExternalContext> external_context() { return _external_context; }
+
+private:
+ void planTensors(const std::vector<onert::ir::OperationIndex> &order,
+ const compiler::GraphLowerInfo &lower_info);
+
+public:
+ // TODO Make it private
+ std::shared_ptr<TensorBuilder> tensor_builder;
+ std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
+ // NOTE ruy context has a thread pool, and when multiple ruy contexts are created,
+ // the thread pool is also created in duplicate
+ // TODO Create one ruy context for session
+ std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_BACKEND_CONTEXT_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Config.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+std::string Config::ID = "builtin";
+
+bool Config::initialize() { return true; }
+
+ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout frontend_layout)
+{
+ return frontend_layout;
+}
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_CONFIG_H__
+#define __ONERT_BACKEND_BUILTIN_CONFIG_H__
+
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/ITimer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+class Config : public IConfig
+{
+public:
+ static std::string ID;
+ std::string id() override { return ID; }
+ bool initialize() override;
+ ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) override;
+ bool supportPermutation() override { return false; }
+ bool supportDynamicTensor() override
+ {
+ // TODO Make this backend to support dynamic tensor or not to build non-constant tensor
+ return true;
+ }
+ bool supportFP16() override { return false; }
+
+ std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); }
+};
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_CONFIG_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_BUILTIN_CONSTANT_INITIALIZER_H__
+#define __ONERT_COMPILER_BUILTIN_CONSTANT_INITIALIZER_H__
+
+#include <backend/basic/ConstantInitializer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+using ConstantInitializer = basic::ConstantInitializer;
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_COMPILER_BUILTIN_CONSTANT_INITIALIZER_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_DYNAMICTENSOR_MANAGER_H__
+#define __ONERT_BACKEND_BUILTIN_DYNAMICTENSOR_MANAGER_H__
+
+#include "TensorRegistry.h"
+#include "Tensor.h"
+
+#include <backend/basic/DynamicTensorManager.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+using DynamicTensorManager = basic::DynamicTensorManager;
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_DYNAMICTENSOR_MANAGER_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_EXTERNAL_CONTEXT_H__
+#define __ONERT_BACKEND_BUILTIN_EXTERNAL_CONTEXT_H__
+
+#include <util/ConfigSource.h>
+
+#include <ruy/context.h>
+#include <ruy/context_get_ctx.h>
+#include <ruy/ctx.h>
+#include <ruy/tune.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+// TODO Unify this with cpu::ExternalContext
+class ExternalContext
+{
+private:
+ static const int kDefaultNumThreadpoolThreads = 1;
+
+public:
+ ExternalContext() : _ruy_context(std::make_unique<ruy::Context>())
+ {
+ setMaxNumThreads(onert::util::getConfigInt(onert::util::config::RUY_THREADS));
+ initPerThreadState();
+ }
+
+ void setMaxNumThreads(int max_num_threads)
+ {
+ const int target_num_threads =
+ max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads;
+ _ruy_context->set_max_num_threads(target_num_threads);
+ }
+
+ ruy::Context *ruy_context() const { return _ruy_context.get(); }
+
+private:
+ void initPerThreadState()
+ {
+ // Initialize per-thread state.
+ const int thread_count = _ruy_context->max_num_threads();
+ auto ctx = ruy::get_ctx(_ruy_context.get());
+ ctx->EnsureThreadSpecificResources(thread_count);
+ for (int i = 0; i < thread_count; i++)
+ {
+ ctx->GetThreadSpecificTuningResolver(i)->SetTuning(ctx->explicit_tuning());
+ }
+ }
+
+private:
+ const std::unique_ptr<ruy::Context> _ruy_context;
+};
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_EXTERNAL_CONTEXT_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IOTensor.h"
+
+#include <assert.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+// `dynamic_cast` not working across library boundaries on NDK
+// With this as a key function, `dynamic_cast` works across dl
+IOTensor::~IOTensor() {}
+
+IOTensor::IOTensor(const ir::OperandInfo &info, ir::Layout layout)
+ : IPortableTensor{info}, _orig_info{info}, _orig_layout{layout}
+{
+ setUserTensor(nullptr, 0);
+}
+
+void IOTensor::setTensor(IPortableTensor *tensor)
+{
+ assert(tensor);
+ assert(tensor != this);
+ // TODO Handle when layout was changed
+ assert(tensor->layout() == _orig_layout); // Changing layout is not considered yet
+ _user_tensor.reset();
+ _tensor = tensor;
+}
+
+void IOTensor::setUserTensor(uint8_t *buffer, size_t size)
+{
+ _user_tensor = std::make_unique<UserTensor>(_orig_info, _orig_layout, buffer, size);
+ _tensor = _user_tensor.get();
+}
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_IO_TENSOR_H__
+#define __ONERT_BACKEND_BUILTIN_IO_TENSOR_H__
+
+#include "backend/IPortableTensor.h"
+#include "UserTensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+/**
+ * @brief Tensor object that indirects to the tensor it is pointing to.
+ *
+ * A model I/O tensor could be two types.
+ *
+ * 1. @c UserTensor, if it is the primary graph
+ * 2. Any other derivative of @c IPortableTensor from another backend, otherwise
+ *
+ * To support these, this object indirects everything to the actual tensor pointer.
+ * Exceptionally if it is UserTensor, this class creates and manages it.
+ */
+class IOTensor : public IPortableTensor
+{
+public:
+ IOTensor(const ir::OperandInfo &info, ir::Layout layout);
+ ~IOTensor();
+
+public:
+ void setTensor(IPortableTensor *tensor);
+ void setUserTensor(uint8_t *buffer, size_t size);
+ ir::OperandInfo orig_info() const { return _orig_info; }
+ ir::Layout orig_layout() const { return _orig_layout; }
+
+public:
+ uint8_t *buffer() const override { return _tensor->buffer(); }
+ size_t total_size() const override { return _tensor->total_size(); }
+ size_t calcOffset(const ir::Coordinates &coords) const override
+ {
+ return _tensor->calcOffset(coords);
+ }
+ ir::Layout layout() const override { return _tensor->layout(); }
+ ir::DataType data_type() const override { return _tensor->data_type(); }
+ bool is_dynamic() const override
+ {
+ return _is_dynamic || _orig_info.isDynamic() || (_tensor && _tensor->is_dynamic());
+ }
+ void set_dynamic() override { _is_dynamic = true; }
+ ir::Shape getShape() const override { return _tensor->getShape(); }
+ void setShape(const ir::Shape &shape) override
+ {
+ // Workaround for IPortableTensor holds _info as its member
+ _info.shape(shape);
+ _tensor->setShape(shape);
+ }
+ bool is_constant() const override { return _tensor->is_constant(); }
+ bool applyShape(const ir::Shape &shape) override
+ {
+ // Workaround for IPortableTensor holds _info as its member
+ _info.shape(shape);
+ return _tensor->applyShape(shape);
+ }
+
+public:
+ void setShapeOfIPortableTensor(const ir::Shape &shape) { _info.shape(shape); }
+
+private:
+ const ir::OperandInfo _orig_info;
+ const ir::Layout _orig_layout;
+ bool _is_dynamic{false};
+ IPortableTensor *_tensor{nullptr}; //< The actual tensor that is indirected
+ std::unique_ptr<UserTensor> _user_tensor; //< If it is a user tensor, it is managed by this object
+};
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_IO_TENSOR_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelGenerator.h"
+
+#include <backend/BackendContext.h>
+#include <util/Utils.h>
+#include "kernel/IfLayer.h"
+#include "kernel/WhileLayer.h"
+#include "kernel/PermuteLayer.h"
+#include "exec/ExecutorBase.h"
+#include "exec/FunctionSequence.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+KernelGenerator::KernelGenerator(const ir::Graph &graph, DynamicTensorManager *dyn_tensor_manager,
+ const std::shared_ptr<TensorRegistry> &tensor_reg,
+ const std::shared_ptr<ExternalContext> &external_context)
+ : basic::KernelGeneratorBase{graph}, _dyn_tensor_manager{dyn_tensor_manager},
+ _tensor_reg{tensor_reg}, _tensor_registries{}, _executor_map{nullptr}, _external_context{
+ external_context}
+{
+ UNUSED_RELEASE(_graph);
+ UNUSED_RELEASE(_tensor_registries);
+ UNUSED_RELEASE(_executor_map);
+}
+
+std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
+{
+ assert(_dyn_tensor_manager);
+ assert(_tensor_reg);
+
+ auto dyn_shape_inferer =
+ std::make_unique<exec::DynamicShapeInferer>(_graph.operands(), _tensor_reg);
+
+ auto ret = std::make_unique<exec::FunctionSequence>();
+
+ // Prepare to handle dynamic tensors later
+ auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
+ {
+ dyn_ctx->op_ind = ind;
+ dyn_ctx->operations = &_graph.operations();
+ dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
+
+ ret->dynamic_tensor_ctx(dyn_ctx);
+ }
+
+ auto &op = _graph.operations().at(ind);
+ op.accept(*this);
+ assert(_return_fn); // _return_fn must have been generated
+ ret->append(std::move(_return_fn));
+
+ return ret;
+}
+
+void KernelGenerator::visit(const ir::operation::If &node)
+{
+ const auto then_subg_index = node.param().then_subg_index;
+ const auto else_subg_index = node.param().else_subg_index;
+
+ std::vector<backend::IPortableTensor *> input_tensors;
+ for (const auto input_index : node.getInputs())
+ {
+ auto input_tensor = getPortableTensor(input_index);
+ input_tensors.emplace_back(input_tensor);
+ }
+
+ std::vector<backend::IPortableTensor *> output_tensors;
+ for (const auto output_index : node.getOutputs())
+ {
+ auto output_tensor = getPortableTensor(output_index);
+ output_tensors.emplace_back(output_tensor);
+ }
+
+ // IfLayer just set ExecutorMap instead of then and else executor to avoid complexity of
+ // creating executor recusively
+ const auto cond_tensor = input_tensors.front();
+ input_tensors.erase(input_tensors.begin());
+ auto fn = std::make_unique<::onert::backend::builtin::kernel::IfLayer>(
+ cond_tensor, input_tensors, output_tensors, then_subg_index, else_subg_index, _executor_map,
+ _external_context);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Permute &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(0)};
+
+ // Add PermuteLayer
+ std::vector<ITensor *> output_tensors{getTensor(output_index)};
+ std::vector<ITensor *> input_tensors{getTensor(input_index)};
+
+ auto fn =
+ std::make_unique<kernel::PermuteLayer>(input_tensors, output_tensors, _external_context);
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::While &node)
+{
+ const auto cond_subg_index = node.param().cond_subg_index;
+ const auto body_subg_index = node.param().body_subg_index;
+
+ // This op does not support input as a constant, because builtin backend does not have
+ // TensorBuilder
+ std::vector<backend::IPortableTensor *> input_tensors;
+ for (const auto input_index : node.getInputs())
+ {
+ auto input_tensor = getPortableTensor(input_index);
+ input_tensors.emplace_back(input_tensor);
+ }
+
+ std::vector<backend::IPortableTensor *> output_tensors;
+ for (const auto output_index : node.getOutputs())
+ {
+ auto output_tensor = getPortableTensor(output_index);
+ output_tensors.emplace_back(output_tensor);
+ }
+
+ // WhileLayer just set ExecutorMap instead of cond and body executor to avoid complexity of
+ // creating executor recusively
+ auto fn = std::make_unique<::onert::backend::builtin::kernel::WhileLayer>(
+ input_tensors, output_tensors, cond_subg_index, body_subg_index, _executor_map,
+ _dyn_tensor_manager->dynamic_mem_mgr().get(), _external_context);
+
+ _return_fn = std::move(fn);
+}
+
+backend::ITensor *KernelGenerator::getTensor(const ir::OperandIndex &index)
+{
+ // get Tensor from all tensor registries (for Permute op)
+ auto ret = _tensor_registries.getITensor(index);
+ assert(ret != nullptr);
+ return ret;
+}
+
+backend::IPortableTensor *KernelGenerator::getPortableTensor(const ir::OperandIndex &index)
+{
+ auto ret = _tensor_reg->getPortableTensor(index);
+ assert(ret != nullptr);
+ return ret;
+}
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_KERNEL_GENERATOR_H__
+#define __ONERT_BACKEND_BUILTIN_KERNEL_GENERATOR_H__
+
+#include "exec/IExecutor.h"
+#include "ExternalContext.h"
+#include "ir/Graph.h"
+#include "TensorBuilder.h"
+#include "compiler/TensorRegistries.h"
+#include "backend/basic/KernelGeneratorBase.h"
+#include "TensorRegistry.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+class KernelGenerator : public basic::KernelGeneratorBase
+{
+public:
+ KernelGenerator(const ir::Graph &graph, DynamicTensorManager *dyn_tensor_manager,
+ const std::shared_ptr<TensorRegistry> &tensor_reg,
+ const std::shared_ptr<ExternalContext> &external_context);
+
+ void setTensorRegistries(const compiler::TensorRegistries &tensor_registries)
+ {
+ _tensor_registries = tensor_registries;
+ }
+ void setExecutorMap(const std::shared_ptr<exec::ExecutorMap> &executor_map)
+ {
+ // FIXME Using shared_ptr's raw pointer!
+ _executor_map = executor_map.get();
+ }
+
+ std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override;
+
+private:
+ void visit(const ir::operation::If &) override;
+ void visit(const ir::operation::Permute &) override;
+ void visit(const ir::operation::While &) override;
+
+private:
+ backend::ITensor *getTensor(const ir::OperandIndex &index);
+ backend::IPortableTensor *getPortableTensor(const ir::OperandIndex &index);
+
+private:
+ DynamicTensorManager *_dyn_tensor_manager;
+ std::shared_ptr<TensorRegistry> _tensor_reg;
+ compiler::TensorRegistries _tensor_registries;
+ exec::ExecutorMap *_executor_map;
+ const std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_KERNEL_GENERATOR_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_TENSOR_H__
+#define __ONERT_BACKEND_BUILTIN_TENSOR_H__
+
+#include <backend/basic/Tensor.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+using Tensor = basic::Tensor;
+using ExternalTensor = basic::ExternalTensor;
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_TENSOR_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TensorBuilder.h"
+
+#include <util/logging.h>
+
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
+ : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg->base_reg())},
+ _static_tensor_mgr{
+ new basic::StaticTensorManager(_tensor_reg->base_reg(), _dynamic_tensor_mgr.get())}
+{
+ /* empty */
+}
+
+void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+ ir::Layout backend_layout)
+{
+ _tensor_info_map.emplace(ind, info);
+
+ VERBOSE_F() << "cpucommon REGISTER!! " << ind << std::endl;
+ if (info.isDynamic())
+ {
+ _dynamic_tensor_mgr->buildTensor(ind, info, backend_layout);
+ }
+ else
+ {
+ _static_tensor_mgr->buildTensor(ind, info, backend_layout, info.isConstant());
+ }
+}
+
+void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
+{
+ // TODO Enhance the way of checking user tensors
+ if (_tensor_info_map.find(ind) == _tensor_info_map.end()) // Do not proceed for user tensors
+ return;
+
+ const auto tensor_info = _tensor_info_map.at(ind);
+
+ if (!nativeOwnTensorAt(ind)->is_dynamic())
+ {
+ const auto size = tensor_info.total_size();
+ _static_tensor_mgr->claimPlan(ind, size);
+ }
+}
+
+void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
+{
+ // TODO Enhance the way of checking user tensors
+ if (_tensor_info_map.find(ind) == _tensor_info_map.end()) // Do not proceed for user tensors
+ return;
+
+ if (!nativeOwnTensorAt(ind)->is_dynamic())
+ {
+ _static_tensor_mgr->releasePlan(ind);
+ }
+}
+
+bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
+{
+ // User tensors are not registered in _tensor_info_map but objects for them are exist
+ // in the tensor registry.
+ // TODO Enhance the way of checking user tensors
+ if (_tensor_reg->getITensor(ind))
+ return true;
+ return _tensor_info_map.find(ind) != _tensor_info_map.end();
+}
+
+void TensorBuilder::allocate(void) { _static_tensor_mgr->allocateNonconsts(); }
+
+DynamicTensorManager *TensorBuilder::dynamicTensorManager(void)
+{
+ return _dynamic_tensor_mgr.get();
+}
+
+basic::Tensor *TensorBuilder::nativeOwnTensorAt(const ir::OperandIndex &ind)
+{
+ return _tensor_reg->getNativeOwnTensor(ind);
+}
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_TENSOR_BUILDER_H__
+#define __ONERT_BACKEND_BUILTIN_TENSOR_BUILDER_H__
+
+#include <backend/basic/StaticTensorManager.h>
+#include <backend/basic/TensorRegistry.h>
+#include <backend/basic/Tensor.h>
+
+#include <ir/OperandIndexMap.h>
+
+#include <unordered_map>
+
+#include "DynamicTensorManager.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+class TensorBuilder
+{
+public:
+ TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg);
+
+ /**
+ * @brief Register tensor information to allocate on CPU backend
+ * @param[in] ind Operand index
+ * @param[in] info Operand information
+ * @param[in] layout Operand data layout
+ */
+ void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+ ir::Layout backend_layout);
+
+ void notifyFirstUse(const ir::OperandIndex &);
+ void notifyLastUse(const ir::OperandIndex &);
+
+ bool isRegistered(const ir::OperandIndex &) const;
+
+ void allocate(void);
+
+ DynamicTensorManager *dynamicTensorManager(void);
+
+ /**
+ * @brief Get tensor with a specific OperandIndex.
+ * @param ind OperandIndex for the tensor. There must exist a tensor with this ind.
+ * If not, program will crash with assert or exception.
+ * @return operand::Tensor *
+ */
+ basic::Tensor *nativeOwnTensorAt(const ir::OperandIndex &ind);
+
+private:
+ const std::shared_ptr<TensorRegistry> _tensor_reg;
+ std::unique_ptr<DynamicTensorManager> _dynamic_tensor_mgr;
+ std::unique_ptr<basic::StaticTensorManager> _static_tensor_mgr;
+ ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
+};
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_TENSOR_BUILDER_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_TENSOR_REGISTRY_H__
+#define __ONERT_BACKEND_BUILTIN_TENSOR_REGISTRY_H__
+
+#include "backend/basic/TensorRegistry.h"
+#include "backend/ITensorRegistry.h"
+#include "Tensor.h"
+#include "IOTensor.h"
+#include <assert.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+/**
+ * @brief Tensor registry class for builtin backend
+ *
+ * This class contains three types of tensors. Two native tensors(tensors that are managed by this
+ * backend) and the other is migrant tensor.
+ *
+ * - NativeIOTensor - @c IOTensor managed by this backend ( in @c _base_reg )
+ * - NOTE The tensor it actually points to can be from another backend
+ * - NativeOwnTensor - @c basic::Tensor managed by this backend ( in @c _base_reg )
+ * - MigrantTensor - @c IPortableTensor managed by other backends
+ *
+ * @note @c _base_reg is used in implementation to reuse @c basic::StaticTensorManager
+ *
+ */
+class TensorRegistry : public ITensorRegistry
+{
+public:
+ TensorRegistry() : _base_reg{new basic::TensorRegistry} {}
+
+ ITensor *getITensor(const ir::OperandIndex &ind) override
+ {
+ auto base_tensor = _base_reg->getITensor(ind);
+ if (base_tensor)
+ return base_tensor;
+ return getNativeIOTensor(ind);
+ }
+
+ ITensor *getNativeITensor(const ir::OperandIndex &ind) override
+ {
+ auto base_tensor = _base_reg->getNativeITensor(ind);
+ if (base_tensor)
+ return base_tensor;
+ return getNativeIOTensor(ind);
+ }
+
+ IPortableTensor *getPortableTensor(const ir::OperandIndex &ind)
+ {
+ auto base_tensor = _base_reg->getPortableTensor(ind);
+ if (base_tensor)
+ return base_tensor;
+ return getNativeIOTensor(ind);
+ }
+
+ IPortableTensor *getNativeTensor(const ir::OperandIndex &ind)
+ {
+ auto base_tensor = _base_reg->getNativeTensor(ind);
+ if (base_tensor)
+ return base_tensor;
+ return getNativeIOTensor(ind);
+ }
+
+ Tensor *getNativeOwnTensor(const ir::OperandIndex &ind)
+ {
+ return _base_reg->getNativeTensor(ind);
+ }
+
+ IOTensor *getNativeIOTensor(const ir::OperandIndex &ind)
+ {
+ auto tensor = _native_io_tensors.find(ind);
+ if (tensor != _native_io_tensors.end())
+ return tensor->second.get();
+ return nullptr;
+ }
+
+ bool setMigrantTensor(const ir::OperandIndex &ind, IPortableTensor *tensor) override
+ {
+ assert(tensor);
+ assert(!getITensor(ind)); // For the ind, tensor is not registered yet
+ _base_reg->setMigrantTensor(ind, tensor);
+ return true;
+ }
+
+ void setNativeOwnTensor(ir::OperandIndex ind, std::unique_ptr<Tensor> &&tensor)
+ {
+ assert(tensor);
+ assert(!getITensor(ind)); // For the ind, tensor is not registered yet
+ _base_reg->setNativeTensor(ind, std::move(tensor));
+ }
+
+ void setNativeIOTensor(ir::OperandIndex ind, std::unique_ptr<IOTensor> &&tensor)
+ {
+ assert(tensor);
+ assert(!getITensor(ind)); // For the ind, tensor is not registered yet
+ _native_io_tensors[ind] = std::move(tensor);
+ }
+
+ const ir::OperandIndexMap<std::unique_ptr<IOTensor>> &native_io_tensors()
+ {
+ return _native_io_tensors;
+ }
+ std::shared_ptr<basic::TensorRegistry> base_reg() { return _base_reg; }
+
+private:
+ std::shared_ptr<basic::TensorRegistry> _base_reg;
+ ir::OperandIndexMap<std::unique_ptr<IOTensor>> _native_io_tensors;
+};
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // ifndef __ONERT_BACKEND_BUILTIN_TENSOR_REGISTRY_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "UserTensor.h"
+
+#include "util/Exceptions.h"
+#include "ir/DataType.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+size_t UserTensor::calcOffset(const ir::Coordinates &coords) const
+{
+ size_t rank = getShape().rank();
+ size_t offset = 0;
+ for (size_t i = 0; i < rank; ++i)
+ {
+ offset = offset * getShape().dim(i) + coords[i];
+ }
+ offset *= sizeOfDataType(data_type());
+ return offset;
+}
+
+bool UserTensor::applyShape(const ir::Shape &new_shape)
+{
+ // User tensors cannot be reallocated.
+ auto new_size = new_shape.num_elements() * ir::sizeOfDataType(data_type());
+ if (total_size() < new_size)
+ throw InsufficientBufferSizeException{"User given buffer size is too small."};
+ setShape(new_shape);
+ return true;
+}
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_USER_TENSOR_H__
+#define __ONERT_BACKEND_BUILTIN_USER_TENSOR_H__
+
+#include "ir/OperandInfo.h"
+#include "backend/IPortableTensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+/**
+ * @brief Tensor object that is for Input and Output tensors from the user.
+ *
+ * This class is a wrapped buffer that is allocated by the user. So it does not have resposibility
+ * on allocation nor deallocation. All the model input/output tensors are wrapped with this class
+ * for execution.
+ *
+ */
+class UserTensor : public IPortableTensor
+{
+public:
+ UserTensor(const ir::OperandInfo &info, ir::Layout layout, uint8_t *buffer, size_t size)
+ : IPortableTensor{info}, _layout{layout}, _buffer{buffer}, _size{size}, _dynamic{false}
+ {
+ }
+
+ UserTensor(const ir::OperandInfo &info, ir::Layout layout) : UserTensor{info, layout, nullptr, 0}
+ {
+ }
+
+public:
+ void setBuffer(uint8_t *buffer, size_t size)
+ {
+ _buffer = buffer;
+ _size = size;
+ }
+
+public:
+ uint8_t *buffer() const override { return _buffer; }
+ size_t total_size() const override { return _size; }
+ size_t calcOffset(const ir::Coordinates &coords) const override;
+ ir::Layout layout() const override { return _layout; }
+ ir::DataType data_type() const override { return _info.typeInfo().type(); }
+ bool is_dynamic() const override { return _dynamic; }
+ void set_dynamic() override { _dynamic = true; }
+ ir::Shape getShape() const override { return _info.shape(); }
+ void setShape(const ir::Shape &new_shape) override { _info.shape(new_shape); }
+ bool is_constant() const override { return false; }
+ bool applyShape(const ir::Shape &) override;
+
+private:
+ ir::Layout _layout;
+ uint8_t *_buffer;
+ size_t _size;
+ bool _dynamic;
+};
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_USER_TENSOR_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IfLayer.h"
+
+#include <backend/ITensor.h>
+#include "exec/ExecutorBase.h"
+#include "PermuteLayer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace kernel
+{
+
+IfLayer::IfLayer(backend::IPortableTensor *cond_tensor,
+ const std::vector<backend::IPortableTensor *> input_tensors,
+ const std::vector<backend::IPortableTensor *> output_tensors,
+ const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
+ exec::ExecutorMap *executor_map,
+ const std::shared_ptr<ExternalContext> &external_context)
+ : _cond_tensor{cond_tensor}, _input_tensors{input_tensors}, _output_tensors{output_tensors},
+ _then_subg_index{then_subg_index}, _else_subg_index{else_subg_index},
+ _executor_map{executor_map}, _external_context{external_context}
+{
+ // At this point, executor_map may not have executors of then subg and else subg
+}
+
+void IfLayer::run()
+{
+ // Check condition
+ // // If true
+ // // // Set _input_tensors -> then-subg's inputs
+ // // // Set outputs of then-subg -> _output_tensors
+ // // // Run then-subg
+ // // Else
+ // // // Set _input_tensors -> else-subg's inputs
+ // // // Set outputs of else-subg -> _output_tensors
+ // // // Run else-subg
+
+ auto getResultCond = [](backend::IPortableTensor *tensor) -> bool {
+ bool ret = false;
+ tensor->access([&](ITensor &tensor) { ret = *reinterpret_cast<bool *>(tensor.buffer()); });
+ return ret;
+ };
+
+ exec::IExecutor *subg_exec = nullptr;
+ bool cond_result = getResultCond(_cond_tensor);
+ if (cond_result)
+ {
+ VERBOSE(If) << "Call to $" << _then_subg_index << " (then)" << std::endl;
+ subg_exec = _executor_map->at(_then_subg_index).get();
+ }
+ else
+ {
+ VERBOSE(If) << "Call to $" << _else_subg_index << " (else)" << std::endl;
+ subg_exec = _executor_map->at(_else_subg_index).get();
+ }
+
+ subg_exec->execute(_input_tensors, _output_tensors);
+ VERBOSE(If) << "Return from $" << (cond_result ? _then_subg_index : _else_subg_index)
+ << std::endl;
+}
+
+} // namespace kernel
+} // namespace builtin
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_KERNEL_IF_LAYER_H__
+#define __ONERT_BACKEND_BUILTIN_KERNEL_IF_LAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include <exec/IExecutor.h>
+#include "../ExternalContext.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace kernel
+{
+
+class IfLayer : public ::onert::exec::IFunction
+{
+public:
+ IfLayer(backend::IPortableTensor *cond_tensor,
+ const std::vector<backend::IPortableTensor *> input_tensors,
+ const std::vector<backend::IPortableTensor *> output_tensors,
+ const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
+ exec::ExecutorMap *executor_map,
+ const std::shared_ptr<ExternalContext> &external_context);
+
+public:
+ void run() override;
+
+private:
+ backend::IPortableTensor *_cond_tensor;
+ const std::vector<backend::IPortableTensor *> _input_tensors;
+ const std::vector<backend::IPortableTensor *> _output_tensors;
+ const ir::SubgraphIndex _then_subg_index;
+ const ir::SubgraphIndex _else_subg_index;
+ exec::ExecutorMap *_executor_map;
+ const std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace kernel
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_KERNEL_IF_LAYER_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PermuteLayer.h"
+
+#include "exec/ShapeConverter.h"
+
+#include "ruy/context.h" // from @ruy
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace kernel
+{
+
+PermuteLayer::PermuteLayer(const std::vector<ITensor *> &src_tensors,
+ const std::vector<ITensor *> &dst_tensors,
+ const std::shared_ptr<ExternalContext> &external_context)
+ : _external_context{external_context}, _tasks_map{}
+{
+ assert(src_tensors.size() == dst_tensors.size());
+ _src_tensors = src_tensors;
+ _dst_tensors = dst_tensors;
+ _src_tensors_offsets.resize(src_tensors.size());
+ _dst_tensors_offsets.resize(dst_tensors.size());
+}
+
+void PermuteLayer::optimize()
+{
+ // Remove copying of tensor as nullptr
+ auto src_it = _src_tensors.begin();
+ auto dst_it = _dst_tensors.begin();
+ auto src_offsets_it = _src_tensors_offsets.begin();
+ auto dst_offsets_it = _dst_tensors_offsets.begin();
+ while (src_it != _src_tensors.end())
+ {
+ if ((*src_it == *dst_it) || (*src_it == nullptr || *dst_it == nullptr))
+ {
+ src_it = _src_tensors.erase(src_it);
+ dst_it = _dst_tensors.erase(dst_it);
+ src_offsets_it = _src_tensors_offsets.erase(src_offsets_it);
+ dst_offsets_it = _dst_tensors_offsets.erase(dst_offsets_it);
+ }
+ else
+ {
+ auto src = *src_it;
+ auto dst = *dst_it;
+ src_offsets_it->resize(0);
+ dst_offsets_it->resize(0);
+ if (underlying_type(src->data_type()) != underlying_type(dst->data_type()))
+ throw std::runtime_error("data type does not match");
+ const auto permute_type = [&]() -> PermuteType {
+ if (src->getShape().rank() == 4 && src->layout() == ir::Layout::NHWC &&
+ dst->layout() == ir::Layout::NCHW)
+ {
+ return PermuteType::NHWC_TO_NCHW;
+ }
+ else if (src->getShape().rank() == 4 && src->layout() == ir::Layout::NCHW &&
+ dst->layout() == ir::Layout::NHWC)
+ {
+ return PermuteType::NCHW_TO_NHWC;
+ }
+ else
+ {
+ return PermuteType::COPY;
+ }
+ }();
+ auto fn = [&](backend::ITensor &src_tensor) {
+ dst->access([&](backend::ITensor &dst_tensor) {
+ // NOTE The buffer of both tensor can be nullptr in this step
+ const auto data_size = ir::sizeOfDataType(src_tensor.data_type());
+
+ if (permute_type == PermuteType::COPY)
+ {
+ if ((!src_tensor.has_padding() && !dst_tensor.has_padding()))
+ {
+ const auto num_elements = src_tensor.getShape().num_elements();
+ const int thread_count =
+ _external_context->ruy_context()->max_num_threads() < static_cast<int>(num_elements)
+ ? _external_context->ruy_context()->max_num_threads()
+ : num_elements;
+
+ std::vector<PermuteWorkerTask> tasks;
+ auto start = 0;
+ for (auto i = 0; i < thread_count; ++i)
+ {
+ int end = start + (num_elements - start) / (thread_count - i);
+ tasks.emplace_back(src_tensor.buffer(), dst_tensor.buffer(), start * data_size,
+ start * data_size, (end - start) * data_size);
+ start = end;
+ }
+ assert(tasks.size() >= 1);
+ _tasks_map[src] = std::move(tasks);
+ }
+ else
+ {
+ auto loop_shape = src_tensor.getShape();
+
+ auto copy_axis = loop_shape.rank() - 1;
+ copy_axis = copy_axis < 0 ? 1 : copy_axis;
+ const auto copy_len = loop_shape.dim(copy_axis) * data_size;
+ loop_shape.dim(copy_axis) = 1;
+
+ appendPermuteTasks(src, dst, loop_shape, copy_len);
+ }
+ }
+ else
+ {
+ assert(src_tensor.getShape().rank() == 4 &&
+ (permute_type == PermuteType::NHWC_TO_NCHW ||
+ permute_type == PermuteType::NCHW_TO_NHWC));
+ const auto loop_shape = src_tensor.getShape();
+ const auto copy_len = data_size;
+
+ appendPermuteTasks(src, dst, loop_shape, copy_len);
+ }
+ });
+ };
+ src->access(fn);
+ src_it++;
+ dst_it++;
+ src_offsets_it++;
+ dst_offsets_it++;
+ }
+ }
+}
+
+void PermuteLayer::appendPermuteTasks(const ITensor *src_tensor, ITensor *dst_tensor,
+ const ir::Shape &loop_shape, size_t size)
+{
+ size_t distributed_dim = 0;
+ auto src_shape = src_tensor->getShape();
+ if (src_tensor->layout() == dst_tensor->layout())
+ {
+ for (int i = 1; i < src_shape.rank() - 1; ++i)
+ {
+ distributed_dim = src_shape.dim(distributed_dim) < src_shape.dim(i) ? i : distributed_dim;
+ }
+ }
+ const auto distributed_dim_val = src_shape.dim(distributed_dim);
+ const int thread_count =
+ _external_context->ruy_context()->max_num_threads() < static_cast<int>(distributed_dim_val)
+ ? _external_context->ruy_context()->max_num_threads()
+ : distributed_dim_val;
+ // NOTE Do not remove this assertion. It would cause performance degradation by new threads to be
+ // created in the context's thread pool
+ assert(thread_count <= _external_context->ruy_context()->max_num_threads());
+
+ std::vector<PermuteWorkerTask> tasks;
+ int start = 0;
+ auto one_thread_loop_shape = loop_shape;
+ for (auto i = 0; i < thread_count; ++i)
+ {
+ ir::Coordinates start_coords(one_thread_loop_shape.rank());
+ start_coords.set(distributed_dim, start);
+ int end = start + (distributed_dim_val - start) / (thread_count - i);
+ one_thread_loop_shape.dim(distributed_dim) = end - start;
+ tasks.emplace_back(*src_tensor, *dst_tensor, start_coords, one_thread_loop_shape, size);
+ start = end;
+ }
+ assert(tasks.size() >= 1);
+ _tasks_map[src_tensor] = std::move(tasks);
+}
+
+void PermuteLayer::runPermuteTasks(backend::ITensor *src, uint8_t *dst_buffer)
+{
+ assert(src->getShape().num_elements() * ir::sizeOfDataType(src->data_type()) <=
+ src->total_size());
+ std::vector<PermuteWorkerTask> &tasks = _tasks_map.at(src);
+ for (size_t i = 0; i < tasks.size(); ++i)
+ {
+ tasks.at(i).setBuffers(src->buffer(), dst_buffer);
+ }
+ assert(tasks.size() >= 1);
+ _external_context->ruy_context()->mutable_thread_pool()->Execute(tasks.size(), tasks.data());
+}
+
+void PermuteLayer::run()
+{
+ assert(_src_tensors.size() == _dst_tensors.size());
+ // PermuteLayer infers dynamic shape inside itself whenever run is called for the following
+ // reasons:
+ // 1. PermuteLayer has to access dynamic tensor manager for input/output tensors of other backends
+ // 2. Other controlflow operation(If/While) uses this layout for copying tensors of other
+ // subgraphs(with other backends)
+ // 3. This infering code is placed here to avoid duplicated code that can be caused by above 2
+ // reasons
+
+ // check if output is not dynamic
+ for (size_t i = 0; i < _src_tensors.size(); ++i)
+ {
+ auto dst_tensor = _dst_tensors.at(i);
+ auto src_tensor = _src_tensors.at(i);
+ if (src_tensor->is_dynamic() || dst_tensor->is_dynamic())
+ {
+ // getting output shape
+ auto src_shape = src_tensor->getShape();
+
+ // set output shape and output buffer
+ ir::Shape new_shape =
+ exec::convertShape(src_shape, src_tensor->layout(), dst_tensor->layout());
+
+ try
+ {
+ if (!dst_tensor->applyShape(new_shape))
+ throw std::runtime_error{
+ "Error: PermuteLayer: output's TensorManager does not support dynamic tensor"};
+ assert(dst_tensor->buffer() != nullptr);
+ }
+ catch (const std::out_of_range &e)
+ {
+ std::cerr << "Error: out_of_range in PermuteLayer: output's TensorManager does not support "
+ "dynamic tensor"
+ << '\n';
+ throw;
+ }
+ }
+ assert(exec::convertShape(src_tensor->getShape(), src_tensor->layout(), dst_tensor->layout()) ==
+ dst_tensor->getShape());
+ }
+ assert(_src_tensors.size() == _dst_tensors.size());
+ assert(_src_tensors.size() == _src_tensors_offsets.size());
+ assert(_dst_tensors.size() == _dst_tensors_offsets.size());
+ auto src_it = _src_tensors.begin();
+ auto dst_it = _dst_tensors.begin();
+ auto src_offsets_it = _src_tensors_offsets.begin();
+ auto dst_offsets_it = _dst_tensors_offsets.begin();
+ while (src_it != _src_tensors.end())
+ {
+ auto src = *src_it;
+ auto dst = *dst_it;
+ auto &src_offsets = *src_offsets_it;
+ auto &dst_offsets = *dst_offsets_it;
+
+ if (src->total_size() == 0)
+ {
+ assert(dst->total_size() == 0);
+ }
+ else
+ {
+ if (src != dst)
+ {
+ // Conditions to run permutation with multithreading
+ // 1. The tasks for multithreathing was created
+ // 2. The tasks's size > 1
+ // 3. Both tensors are not dynamic
+ if (_tasks_map.find(src) == _tasks_map.end() || _tasks_map.at(src).size() == 1 ||
+ src->is_dynamic() || dst->is_dynamic())
+ {
+ permute(src, dst, src->getShape().rank(), src_offsets, dst_offsets);
+ }
+ // If dst is subtensor, we have to use clEnqueueMapBuffer instead of clEnqueueWirteBuffer
+ else if (dst->needMemoryMap() && !dst->is_subtensor())
+ {
+ if (!src->has_padding() && !dst->has_padding() && src->layout() == dst->layout())
+ {
+ // This is more effective than multi-threading
+ src->access([&](backend::ITensor &) { dst->enqueueWriteBuffer(src->buffer(), false); });
+ }
+ else
+ {
+ // TODO Optimize this block in case of that padding size of dst is big.
+ _buffers_map[dst].reserve(dst->total_size());
+ auto dst_buffer = _buffers_map[dst].data();
+
+ src->access([&](backend::ITensor &) { runPermuteTasks(src, dst_buffer); });
+ dst->enqueueWriteBuffer(dst_buffer, false);
+ }
+ }
+ else if (src->needMemoryMap() && !src->is_subtensor() && !src->has_padding() &&
+ !dst->has_padding() && src->layout() == dst->layout())
+ {
+ // This is more effective than multi-threading
+ assert(!dst->needMemoryMap());
+ dst->access([&](backend::ITensor &) { src->enqueueReadBuffer(dst->buffer(), true); });
+ }
+ else
+ {
+ auto fn = [&](backend::ITensor &) {
+ dst->access([&](backend::ITensor &) { runPermuteTasks(src, dst->buffer()); });
+ };
+ src->access(fn);
+ }
+ }
+ }
+ src_it++;
+ dst_it++;
+ src_offsets_it++;
+ dst_offsets_it++;
+ }
+}
+
+} // namespace kernel
+} // namespace builtin
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_KERNEL_PERMUTELAYER_H__
+#define __ONERT_BACKEND_BUILTIN_KERNEL_PERMUTELAYER_H__
+
+#include "exec/IPermuteFunction.h"
+#include "exec/IExecutor.h"
+#include "../ExternalContext.h"
+#include "ruy/thread_pool.h" // from @ruy
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace kernel
+{
+
+class PermuteLayer : public onert::exec::IPermuteFunction
+{
+public:
+ PermuteLayer(const std::vector<ITensor *> &src_tensors, const std::vector<ITensor *> &dst_tensors,
+ const std::shared_ptr<ExternalContext> &external_context);
+
+ void optimize() override;
+
+ void run() override;
+
+private:
+ std::shared_ptr<ExternalContext> _external_context;
+
+private:
+ void appendPermuteTasks(const ITensor *src_tensor, ITensor *dst_tensor,
+ const ir::Shape &loop_shape, size_t size);
+
+ void runPermuteTasks(backend::ITensor *src, uint8_t *dst_buffer);
+
+ struct PermuteWorkerTask : ruy::Task
+ {
+ using Strides = ir::Coordinates;
+
+ PermuteWorkerTask(const ITensor &src_tensor, ITensor &dst_tensor,
+ const ir::Coordinates &start_coords, const ir::Shape &loop_shape, size_t size)
+ : _src_buffer{src_tensor.buffer()}, _dst_buffer{dst_tensor.buffer()},
+ _src_start_offset{src_tensor.calcOffset(start_coords)},
+ _dst_start_offset{dst_tensor.calcOffset(start_coords)}, _src_strides{}, _dst_strides{},
+ _loop_shape{loop_shape}, _size{size}, _src_layout{src_tensor.layout()},
+ _dst_layout{dst_tensor.layout()}, _is_permutation{true}
+ {
+ // Set strides
+ setStrides(src_tensor, &_src_strides);
+ setStrides(dst_tensor, &_dst_strides);
+
+ _is_permutation = (_src_layout != _dst_layout && loop_shape.rank() == 4);
+ }
+ // Constructor for a copy
+ PermuteWorkerTask(const uint8_t *src_buffer, uint8_t *dst_buffer, uint32_t src_start_offset,
+ uint32_t dst_start_offset, size_t size)
+ : _src_buffer{src_buffer}, _dst_buffer{dst_buffer}, _src_start_offset{src_start_offset},
+ _dst_start_offset{dst_start_offset}, _src_strides{0}, _dst_strides{0},
+ _loop_shape{1}, _size{size}, _src_layout{}, _dst_layout{}, _is_permutation{false}
+ {
+ // DO NOTHING
+ }
+ void setBuffers(const uint8_t *src_buffer, uint8_t *dst_buffer)
+ {
+ _src_buffer = src_buffer;
+ _dst_buffer = dst_buffer;
+ }
+ void Run() override
+ {
+ ShapeLoop(_loop_shape, [&](const onert::ir::Coordinates &coords) {
+ size_t src_offset = _src_start_offset;
+ size_t dst_offset = _dst_start_offset;
+ assert(static_cast<size_t>(_loop_shape.rank()) == coords.size());
+ ir::Coordinates dst_coords = coords;
+ if (_is_permutation)
+ {
+ dst_coords = ir::convertCoordinates(coords, _src_layout, _dst_layout);
+ }
+ for (auto i = 0; i < _loop_shape.rank(); ++i)
+ {
+ assert(coords[i] >= 0 && dst_coords[i] >= 0);
+ src_offset += coords[i] * _src_strides[i];
+ dst_offset += dst_coords[i] * _dst_strides[i];
+ }
+ memcpy(_dst_buffer + dst_offset, _src_buffer + src_offset, _size);
+ });
+ }
+
+ private:
+ void setStrides(const ITensor &tensor, Strides *strides)
+ {
+ auto shape = tensor.getShape();
+ const size_t rank = shape.rank();
+ for (size_t i = 0; i < rank; ++i)
+ {
+ ir::Coordinates no_step(rank), one_step(rank);
+ one_step.set(i, 1);
+ if (shape.dim(i) > 1)
+ {
+ strides->set(i, tensor.calcOffset(one_step) - tensor.calcOffset(no_step));
+ }
+ else
+ {
+ // If dimension value is 0 or 1, the stride of the dimension will be not used
+ // Do not call calcOffset() with coordinate value that is greater than dimension value
+ strides->set(i, 0);
+ }
+ assert((*strides)[i] >= 0);
+ }
+ }
+
+ private:
+ const uint8_t *_src_buffer;
+ uint8_t *_dst_buffer;
+ size_t _src_start_offset;
+ size_t _dst_start_offset;
+ Strides _src_strides;
+ Strides _dst_strides;
+ const ir::Shape _loop_shape;
+ const size_t _size;
+ const ir::Layout _src_layout;
+ const ir::Layout _dst_layout;
+ bool _is_permutation;
+ };
+ std::unordered_map<const ITensor *, std::vector<PermuteWorkerTask>> _tasks_map;
+};
+
+} // namespace kernel
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_KERNEL_PERMUTELAYER_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "WhileLayer.h"
+
+#include <algorithm>
+#include <backend/ITensor.h>
+#include "exec/ExecutorBase.h"
+#include <misc/polymorphic_downcast.h>
+#include "PermuteLayer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace kernel
+{
+
+WhileLayer::WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors,
+ const std::vector<backend::IPortableTensor *> output_tensors,
+ const ir::SubgraphIndex &cond_subg_index,
+ const ir::SubgraphIndex &body_subg_index, exec::ExecutorMap *executor_map,
+ basic::DynamicMemoryManager *dyn_memory_manager,
+ const std::shared_ptr<ExternalContext> &external_context)
+ : _cond_subg_index{cond_subg_index}, _body_subg_index{body_subg_index},
+ _input_tensors{input_tensors}, _output_tensors{output_tensors}, _executor_map{executor_map},
+ _dyn_memory_manager{dyn_memory_manager}, _external_context{external_context}
+{
+ // At this point, executor_map may not have executors of cond subg and body subg
+}
+
+void WhileLayer::run()
+{
+ // Copy "_input_tensors" -> "cond subg inputs"
+ // Run cond subg
+ // Start loop while output of cond subg is ture
+ // // Copy "_input_tensors" -> "body subg inputs" in the first iteration, then copy "body subg
+ // outputs" -> "body subg inputs" in the second or more iterations
+ // // Run body subg
+ // // Copy "body subg outputs" -> "cond subg inputs"
+ // // Run cond subg
+ // If there is no loop copy "_input_tensors" -> "_dst_tensors", else copy "cond subg inputs" ->
+ // "_dst_tensors"
+ auto cond_exec = _executor_map->at(_cond_subg_index).get();
+ auto body_exec = _executor_map->at(_body_subg_index).get();
+
+ // Need a temp tensor to hold the cond subgraph output
+ assert(cond_exec->getOutputTensors().size() == 1);
+ auto cond_output_tensor = [&]() {
+ auto cond_output = cond_exec->getOutputTensors().at(0);
+ auto tensor = std::make_unique<Tensor>(cond_output->orig_info(), cond_output->orig_layout(),
+ _dyn_memory_manager);
+ tensor->set_dynamic();
+ tensor->setBuffer(_dyn_memory_manager->allocate(tensor.get(), tensor->total_size()));
+ return tensor;
+ }();
+
+ VERBOSE(While) << "Call to $" << _cond_subg_index << " (cond)" << std::endl;
+ cond_exec->execute(_input_tensors, {cond_output_tensor.get()});
+ VERBOSE(While) << "Return from $" << _cond_subg_index << std::endl;
+
+ auto getResultCond = [](backend::ITensor *tensor) -> bool {
+ bool ret = false;
+ tensor->access([&](ITensor &tensor) { ret = *reinterpret_cast<bool *>(tensor.buffer()); });
+ return ret;
+ };
+
+ std::vector<ITensor *> op_inputs(_input_tensors.begin(), _input_tensors.end());
+ std::vector<ITensor *> op_outputs(_output_tensors.begin(), _output_tensors.end());
+ // Copying body inputs to outputs when the loop body is never executed
+ if (!getResultCond(cond_output_tensor.get()))
+ {
+ PermuteLayer copy_body_inputs_to_op_outputs{op_inputs, op_outputs, _external_context};
+ copy_body_inputs_to_op_outputs.run();
+ return;
+ }
+
+ // Need some temp tensors to hold the body subgraph output
+ std::vector<std::unique_ptr<Tensor>> temp_outputs_o;
+ std::vector<IPortableTensor *> temp_outputs;
+ for (auto io_tensor : body_exec->getOutputTensors())
+ {
+ auto tensor = std::make_unique<Tensor>(io_tensor->orig_info(), io_tensor->orig_layout(),
+ _dyn_memory_manager);
+ tensor->set_dynamic();
+ tensor->setBuffer(_dyn_memory_manager->allocate(tensor.get(), tensor->total_size()));
+ temp_outputs.push_back(tensor.get());
+ temp_outputs_o.push_back(std::move(tensor));
+ }
+
+ std::vector<ITensor *> body_outputs(temp_outputs.begin(), temp_outputs.end());
+ PermuteLayer copy_body_outputs_to_op_outputs{body_outputs, op_outputs, _external_context};
+
+ const auto body_execute_with_op_inputs = [&]() {
+ VERBOSE(While) << "Call to $" << _body_subg_index << " (body)" << std::endl;
+ body_exec->execute(_input_tensors, temp_outputs);
+ VERBOSE(While) << "Return from $" << _body_subg_index << std::endl;
+ };
+
+ const auto body_execute_with_body_outputs = [&]() {
+ VERBOSE(While) << "Call to $" << _body_subg_index << " (body)" << std::endl;
+ body_exec->execute(_output_tensors, temp_outputs);
+ VERBOSE(While) << "Return from $" << _body_subg_index << std::endl;
+ };
+
+ std::function<void()> body_execute = body_execute_with_op_inputs;
+ const auto cond_execute = [&]() {
+ VERBOSE(While) << "Call to $" << _cond_subg_index << " (cond)" << std::endl;
+ cond_exec->execute(_output_tensors, {cond_output_tensor.get()});
+ VERBOSE(While) << "Return from $" << _cond_subg_index << std::endl;
+ };
+
+ // Loop while Cond subgraph's output is true
+ while (getResultCond(cond_output_tensor.get()))
+ {
+ body_execute();
+ copy_body_outputs_to_op_outputs.run();
+ cond_execute();
+ body_execute = body_execute_with_body_outputs;
+ }
+
+ // Clean-up the temp tensors
+ _dyn_memory_manager->deallocate(cond_output_tensor.get());
+ for (auto tensor : temp_outputs)
+ {
+ _dyn_memory_manager->deallocate(tensor);
+ }
+}
+
+} // namespace kernel
+} // namespace builtin
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_KERNEL_WHILE_LAYER_H__
+#define __ONERT_BACKEND_BUILTIN_KERNEL_WHILE_LAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include <exec/IExecutor.h>
+#include <exec/IFunction.h>
+#include <ir/OperandIndexSequence.h>
+#include <ir/Graph.h>
+#include "../ExternalContext.h"
+
+#include "backend/basic/MemoryManager.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace kernel
+{
+
+class WhileLayer : public ::onert::exec::IFunction
+{
+public:
+ WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors,
+ const std::vector<backend::IPortableTensor *> output_tensors,
+ const ir::SubgraphIndex &cond_subg_index, const ir::SubgraphIndex &body_subg_index,
+ exec::ExecutorMap *executor_map, basic::DynamicMemoryManager *dyn_memory_manager,
+ const std::shared_ptr<ExternalContext> &external_context);
+
+public:
+ void run() override;
+
+private:
+ const ir::SubgraphIndex _cond_subg_index;
+ const ir::SubgraphIndex _body_subg_index;
+ const std::vector<backend::IPortableTensor *> _input_tensors;
+ const std::vector<backend::IPortableTensor *> _output_tensors;
+ exec::ExecutorMap *_executor_map;
+ basic::DynamicMemoryManager *_dyn_memory_manager; // For generating temp tensors
+ const std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace kernel
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_KERNEL_WHILE_LAYER_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_BACKEND_H__
-#define __ONERT_BACKEND_CONTROLFLOW_BACKEND_H__
-
-#include "BackendContext.h"
-#include "Config.h"
-#include "ConstantInitializer.h"
-#include "KernelGenerator.h"
-#include "TensorBuilder.h"
-#include "Tensor.h"
-
-#include <backend/Backend.h>
-
-#include <memory>
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-class Backend : public ::onert::backend::Backend
-{
-public:
- Backend() : _config{std::make_shared<Config>()} {}
-
- std::shared_ptr<IConfig> config() const override { return _config; }
-
- std::unique_ptr<onert::backend::BackendContext>
- newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &,
- bool) const override
- {
- const auto &operands = graph.operands();
- auto context = std::make_unique<BackendContext>(this, &graph);
- // ControlFlow backend may not build tensors for itself because the backend's operation uses
- // tensors of other baceknd instead
- // But the backend builds tensors in case of that the controlflow operation may have constant
- // input or that consecutive controflow operations exist. We have to make them not to be built
- // later
- // 1. Constant input
- // These tensors cannot be dynamic tensor, so let's do it as follows:
- // - always skip copying
- // - if it is operation's input in child subgraph: register "use" as constant input of the
- // operations in child subgraph
- // - if it is child subgraph's output: register "use" as constant input of the operations
- // using it
- // 2. Consecutive controflow operation's intermediate tensor
- // These tensors can be dynamic tensor and this is complicated to support without copying. But
- // there is no such case until now, let's support it later
- // TODO Remove TensorBuilder and ConstantInitializer
- // TODO Support Consecutive controflow operation's intermediate tensor
- auto tr = std::make_shared<TensorRegistry>();
- auto tb = std::make_shared<TensorBuilder>(tr);
- context->tensor_registry = tr;
- context->tensor_builder = tb;
- context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
- context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb->dynamicTensorManager(), tr,
- context->external_context());
- return context;
- }
-
-private:
- std::shared_ptr<IConfig> _config;
-};
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_BACKEND_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "BackendContext.h"
-
-#include "KernelGenerator.h"
-#include "backend/cpu_common/BackendContextHelpers.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-void BackendContext::initConsts()
-{
- for (auto &op : operation_list())
- {
- constant_initializer->setLayout(op.layout);
- graph()->operations().at(op.index).accept(*constant_initializer);
- }
-
- for (auto ind : operand_list())
- {
- const auto &obj = graph()->operands().at(ind);
- if (obj.isConstant() && !constant_initializer->exist(ind))
- {
- constant_initializer->registerDefaultInitializer(ind, obj);
- }
- }
-
- constant_initializer->run();
-}
-
-ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs,
- const ir::LowerInfoMap &lower_info)
-{
- auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
- ir::Remove::DUPLICATED;
- for (auto index : operand_list())
- {
- if (model_io.contains(index))
- continue;
- const auto &obj = graph()->operands().at(index);
- const auto frontend_layout = [&]() {
- if (obj.getUses().size() == 0)
- return ir::Layout::UNKNOWN;
- auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses?
- for (auto &operation_info : operation_list())
- {
- if (operation_info.index == use_op_ind)
- return operation_info.layout;
- }
- return ir::Layout::UNKNOWN;
- }();
- const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement();
- if (permute_factor.backend() != backend())
- continue;
- const auto backend_layout = permute_factor.layout();
- ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
- obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
- tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
- }
-
- // TODO Get compiler options from compiler, and use it rather than getting it from Env
- if (util::getConfigString(util::config::EXECUTOR) == "Linear")
- {
- cpu_common::planTensors(*this, order, op_seqs, lower_info);
- }
- else
- {
- // For the executors that does not have fixed linear execution order:
- // To make tensors never be deallocated, this is a workaround to use static memory planner
- for (auto ind : operand_list())
- {
- if (tensor_builder->isRegistered(ind))
- tensor_builder->notifyFirstUse(ind);
- }
- }
-
- tensor_builder->prepare();
-
- return tensor_registry.get();
-}
-
-FunctionMap BackendContext::genKernels(const std::vector<ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs)
-{
- FunctionMap ret;
-
- for (auto op_seq_ind : order)
- {
- const auto &op_seq = op_seqs.at(op_seq_ind);
- bool assigned = [&]() {
- for (auto op_info : operation_list())
- if (op_seq.exist(op_info.index))
- return true;
- return false;
- }();
- if (!assigned)
- continue;
- auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
- ret.emplace_back(op_seq_ind, std::move(fn_seq));
- }
-
- initConsts();
-
- // NOTE For memory optimization, we want to free some operand data
- for (auto ind : operand_list())
- {
- // TODO Remove const_cast
- auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
- obj.releaseData();
- }
-
- for (auto &it : ret)
- {
- auto &fn_seq = it.second;
- fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
- }
-
- return ret;
-}
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_BACKEND_CONTEXT_H__
-#define __ONERT_BACKEND_CONTROLFLOW_BACKEND_CONTEXT_H__
-
-#include <backend/BackendContext.h>
-#include "TensorBuilder.h"
-#include "ConstantInitializer.h"
-#include "KernelGenerator.h"
-#include "ExternalContext.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-class BackendContext : public onert::backend::BackendContext
-{
-public:
- BackendContext(const Backend *backend, const ir::Graph *graph,
- std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
- std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
- std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
- std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
- : onert::backend::BackendContext(backend, graph, tensor_registry),
- tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
- kernel_gen{kernel_gen}, _external_context(std::make_shared<ExternalContext>())
- {
- }
-
- ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs,
- const ir::LowerInfoMap &lower_info) override;
-
- FunctionMap genKernels(const std::vector<ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs) override;
-
- std::shared_ptr<ExternalContext> external_context() { return _external_context; }
-
-private:
- void initConsts();
- void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
- const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
-
-public:
- // TODO Make it private
- std::shared_ptr<TensorBuilder> tensor_builder;
- std::shared_ptr<ConstantInitializer> constant_initializer;
- std::shared_ptr<KernelGenerator> kernel_gen;
-
-private:
- // NOTE ruy context has a thread pool, and when multiple ruy contexts are created,
- // the thread pool is also created in duplicate
- // TODO Create one ruy context for session
- std::shared_ptr<ExternalContext> _external_context;
-};
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_BACKEND_CONTEXT_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Config.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-std::string Config::ID = "controlflow";
-
-bool Config::initialize() { return true; }
-
-ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout frontend_layout)
-{
- return frontend_layout;
-}
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_CONFIG_H__
-#define __ONERT_BACKEND_CONTROLFLOW_CONFIG_H__
-
-#include <backend/IConfig.h>
-#include <memory>
-#include <util/ITimer.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-class Config : public IConfig
-{
-public:
- static std::string ID;
- std::string id() override { return ID; }
- bool initialize() override;
- ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) override;
- bool supportPermutation() override { return false; }
- bool supportDynamicTensor() override
- {
- // TODO Make this backend to support dynamic tensor or not to build non-constant tensor
- return true;
- }
- bool supportFP16() override { return false; }
-
- std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); }
-};
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_CONFIG_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__
-#define __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__
-
-#include <backend/cpu_common/ConstantInitializer.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-using ConstantInitializer = cpu_common::ConstantInitializer;
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_DYNAMICTENSOR_MANAGER_H__
-#define __ONERT_BACKEND_CONTROLFLOW_DYNAMICTENSOR_MANAGER_H__
-
-#include "TensorRegistry.h"
-#include "Tensor.h"
-
-#include <backend/cpu_common/DynamicTensorManager.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-using DynamicTensorManager = cpu_common::DynamicTensorManager;
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_DYNAMICTENSOR_MANAGER_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_EXTERNAL_CONTEXT_H__
-#define __ONERT_BACKEND_CONTROLFLOW_EXTERNAL_CONTEXT_H__
-
-#include <util/ConfigSource.h>
-
-#include <ruy/context.h>
-#include <ruy/context_get_ctx.h>
-#include <ruy/ctx.h>
-#include <ruy/tune.h>
-
-namespace
-{
-const int kDefaultNumThreadpoolThreads = 1;
-}
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-// TODO Unify this with cpu::ExternalContext
-class ExternalContext
-{
-public:
- ExternalContext() : _ruy_context(std::make_unique<ruy::Context>())
- {
- setMaxNumThreads(onert::util::getConfigInt(onert::util::config::RUY_THREADS));
- initPerThreadState();
- }
-
- void setMaxNumThreads(int max_num_threads)
- {
- const int target_num_threads =
- max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads;
- _ruy_context->set_max_num_threads(target_num_threads);
- }
-
- ruy::Context *ruy_context() const { return _ruy_context.get(); }
-
-private:
- void initPerThreadState()
- {
- // Initialize per-thread state.
- const int thread_count = _ruy_context->max_num_threads();
- auto ctx = ruy::get_ctx(_ruy_context.get());
- ctx->EnsureThreadSpecificResources(thread_count);
- for (int i = 0; i < thread_count; i++)
- {
- ctx->GetThreadSpecificTuningResolver(i)->SetTuning(ctx->explicit_tuning());
- }
- }
-
-private:
- const std::unique_ptr<ruy::Context> _ruy_context;
-};
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_EXTERNAL_CONTEXT_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "IOTensor.h"
-
-#include <assert.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-IOTensor::IOTensor(const ir::OperandInfo &info, ir::Layout layout)
- : IPortableTensor{info}, _orig_info{info}, _orig_layout{layout}
-{
- setUserTensor(nullptr, 0);
-}
-
-void IOTensor::setTensor(IPortableTensor *tensor)
-{
- assert(tensor);
- assert(tensor != this);
- // TODO Handle when layout was changed
- assert(tensor->layout() == _orig_layout); // Changing layout is not considered yet
- _user_tensor.reset();
- _tensor = tensor;
-}
-
-void IOTensor::setUserTensor(uint8_t *buffer, size_t size)
-{
- _user_tensor = std::make_unique<UserTensor>(_orig_info, _orig_layout, buffer, size);
- _tensor = _user_tensor.get();
-}
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_IO_TENSOR_H__
-#define __ONERT_BACKEND_CONTROLFLOW_IO_TENSOR_H__
-
-#include "backend/IPortableTensor.h"
-#include "UserTensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-/**
- * @brief Tensor object that indirects to the tensor it is pointing to.
- *
- * A model I/O tensor could be two types.
- *
- * 1. @c UserTensor, if it is the primary graph
- * 2. Any other derivative of @c IPortableTensor from another backend, otherwise
- *
- * To support these, this object indirects everything to the actual tensor pointer.
- * Exceptionally if it is UserTensor, this class creates and manages it.
- */
-class IOTensor : public IPortableTensor
-{
-public:
- IOTensor(const ir::OperandInfo &info, ir::Layout layout);
-
-public:
- void setTensor(IPortableTensor *tensor);
- void setUserTensor(uint8_t *buffer, size_t size);
- ir::OperandInfo orig_info() const { return _orig_info; }
- ir::Layout orig_layout() const { return _orig_layout; }
-
-public:
- uint8_t *buffer() const override { return _tensor->buffer(); }
- size_t total_size() const override { return _tensor->total_size(); }
- size_t dimension(size_t index) const override { return _tensor->dimension(index); }
- size_t num_dimensions() const override { return _tensor->num_dimensions(); }
- size_t calcOffset(const ir::Coordinates &coords) const override
- {
- return _tensor->calcOffset(coords);
- }
- ir::Layout layout() const override { return _tensor->layout(); }
- ir::DataType data_type() const override { return _tensor->data_type(); }
- float data_scale() const override { return _tensor->data_scale(); }
- int32_t data_offset() const override { return _tensor->data_offset(); }
- bool is_dynamic() const override { return _is_dynamic || (_tensor && _tensor->is_dynamic()); }
- void set_dynamic() override { _is_dynamic = true; }
- ir::Shape getShape() const override { return _tensor->getShape(); }
- void setShape(const ir::Shape &shape) override
- {
- // Workaround for IPortableTensor holds _info as its member
- _info.shape(shape);
- _tensor->setShape(shape);
- }
- bool is_constant() const override { return _tensor->is_constant(); }
- bool applyShape(const ir::Shape &shape) override
- {
- // Workaround for IPortableTensor holds _info as its member
- _info.shape(shape);
- return _tensor->applyShape(shape);
- }
-
-private:
- const ir::OperandInfo _orig_info;
- const ir::Layout _orig_layout;
- bool _is_dynamic{false};
- IPortableTensor *_tensor{nullptr}; //< The actual tensor that is indirected
- std::unique_ptr<UserTensor> _user_tensor; //< If it is a user tensor, it is managed by this object
-};
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_IO_TENSOR_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "KernelGenerator.h"
-
-#include <backend/BackendContext.h>
-#include <util/Utils.h>
-#include "kernel/IfLayer.h"
-#include "kernel/WhileLayer.h"
-#include "kernel/PermuteLayer.h"
-#include "exec/ExecutorBase.h"
-#include "exec/FunctionSequence.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-KernelGenerator::KernelGenerator(const ir::Graph &graph, DynamicTensorManager *dyn_tensor_manager,
- const std::shared_ptr<TensorRegistry> &tensor_reg,
- const std::shared_ptr<ExternalContext> &external_context)
- : _graph{graph}, _dyn_tensor_manager{dyn_tensor_manager}, _tensor_reg{tensor_reg},
- _tensor_registries{}, _executor_map{nullptr}, _external_context{external_context}
-{
- UNUSED_RELEASE(_graph);
- UNUSED_RELEASE(_tensor_registries);
- UNUSED_RELEASE(_executor_map);
-}
-
-void KernelGenerator::visit(const ir::OpSequence &op_seq)
-{
- assert(!_return_fn_seq);
- assert(_dyn_tensor_manager);
- assert(_tensor_reg);
-
- auto dyn_shape_inferer =
- std::make_unique<exec::DynamicShapeInferer>(_graph.operands(), _tensor_reg);
-
- _return_fn_seq = std::make_unique<exec::FunctionSequence>();
-
- // Prepare to handle dynamic tensors later
- auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
- {
- dyn_ctx->op_seq = &op_seq;
- dyn_ctx->operations = &_graph.operations();
- dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
- dyn_ctx->dynamic_tensor_manager = _dyn_tensor_manager;
-
- _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
- }
-
- for (const auto &op_idx : op_seq.operations())
- {
- const auto &node = _graph.operations().at(op_idx);
- node.accept(*this);
- _return_fn_seq->append(releaseFunction());
- }
-}
-
-void KernelGenerator::visit(const ir::operation::If &node)
-{
- const auto then_subg_index = node.param().then_subg_index;
- const auto else_subg_index = node.param().else_subg_index;
-
- std::vector<backend::IPortableTensor *> input_tensors;
- for (const auto input_index : node.getInputs())
- {
- auto input_tensor = getPortableTensor(input_index);
- input_tensors.emplace_back(input_tensor);
- }
-
- std::vector<backend::IPortableTensor *> output_tensors;
- for (const auto output_index : node.getOutputs())
- {
- auto output_tensor = getPortableTensor(output_index);
- output_tensors.emplace_back(output_tensor);
- }
-
- // IfLayer just set ExecutorMap instead of then and else executor to avoid complexity of
- // creating executor recusively
- const auto cond_tensor = input_tensors.front();
- input_tensors.erase(input_tensors.begin());
- auto fn = std::make_unique<::onert::backend::controlflow::kernel::IfLayer>(
- cond_tensor, input_tensors, output_tensors, then_subg_index, else_subg_index, _executor_map,
- _external_context);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Permute &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(0)};
-
- // Add PermuteLayer
- std::vector<ITensor *> output_tensors{getTensor(output_index)};
- std::vector<ITensor *> input_tensors{getTensor(input_index)};
-
- auto fn =
- std::make_unique<kernel::PermuteLayer>(input_tensors, output_tensors, _external_context);
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::While &node)
-{
- const auto cond_subg_index = node.param().cond_subg_index;
- const auto body_subg_index = node.param().body_subg_index;
-
- // This op does not support input as a constant, because controlflow backend does not have
- // TensorBuilder
- std::vector<backend::IPortableTensor *> input_tensors;
- for (const auto input_index : node.getInputs())
- {
- auto input_tensor = getPortableTensor(input_index);
- input_tensors.emplace_back(input_tensor);
- }
-
- std::vector<backend::IPortableTensor *> output_tensors;
- for (const auto output_index : node.getOutputs())
- {
- auto output_tensor = getPortableTensor(output_index);
- output_tensors.emplace_back(output_tensor);
- }
-
- // WhileLayer just set ExecutorMap instead of cond and body executor to avoid complexity of
- // creating executor recusively
- auto fn = std::make_unique<::onert::backend::controlflow::kernel::WhileLayer>(
- input_tensors, output_tensors, cond_subg_index, body_subg_index, _executor_map,
- _dyn_tensor_manager->dynamic_mem_mgr().get(), _external_context);
-
- _return_fn = std::move(fn);
-}
-
-backend::ITensor *KernelGenerator::getTensor(const ir::OperandIndex &index)
-{
- // get Tensor from all tensor registries (for Permute op)
- auto ret = _tensor_registries.getITensor(index);
- assert(ret != nullptr);
- return ret;
-}
-
-backend::IPortableTensor *KernelGenerator::getPortableTensor(const ir::OperandIndex &index)
-{
- auto ret = _tensor_reg->getPortableTensor(index);
- assert(ret != nullptr);
- return ret;
-}
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_GENERATOR_H__
-#define __ONERT_BACKEND_CONTROLFLOW_KERNEL_GENERATOR_H__
-
-#include <exec/IExecutor.h>
-#include "ExternalContext.h"
-#include <ir/Graph.h>
-#include "TensorBuilder.h"
-#include "compiler/TensorRegistries.h"
-#include "backend/cpu_common/KernelGeneratorBase.h"
-#include "TensorRegistry.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-class KernelGenerator : public cpu_common::KernelGeneratorBase
-{
-public:
- KernelGenerator(const ir::Graph &graph, DynamicTensorManager *dyn_tensor_manager,
- const std::shared_ptr<TensorRegistry> &tensor_reg,
- const std::shared_ptr<ExternalContext> &external_context);
-
- void setTensorRegistries(const compiler::TensorRegistries &tensor_registries)
- {
- _tensor_registries = tensor_registries;
- }
- void setExecutorMap(const std::shared_ptr<exec::ExecutorMap> &executor_map)
- {
- // FIXME Using shared_ptr's raw pointer!
- _executor_map = executor_map.get();
- }
-
- void visit(const ir::OpSequence &) override;
- void visit(const ir::operation::If &) override;
- void visit(const ir::operation::Permute &) override;
- void visit(const ir::operation::While &) override;
-
-private:
- backend::ITensor *getTensor(const ir::OperandIndex &index);
- backend::IPortableTensor *getPortableTensor(const ir::OperandIndex &index);
-
-private:
- const ir::Graph &_graph;
- DynamicTensorManager *_dyn_tensor_manager;
- std::shared_ptr<TensorRegistry> _tensor_reg;
- compiler::TensorRegistries _tensor_registries;
- exec::ExecutorMap *_executor_map;
- const std::shared_ptr<ExternalContext> _external_context;
-};
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_KERNEL_GENERATOR_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__
-#define __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__
-
-#include <backend/cpu_common/Tensor.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-using Tensor = cpu_common::Tensor;
-using ExternalTensor = cpu_common::ExternalTensor;
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TensorBuilder.h"
-
-#include <util/logging.h>
-
-#include <cassert>
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
- : _tensor_reg{tensor_reg},
- _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg->base_reg())},
- _static_tensor_mgr{
- new cpu_common::StaticTensorManager(_tensor_reg->base_reg(), _dynamic_tensor_mgr.get())}
-{
- /* empty */
-}
-
-void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- ir::Layout backend_layout)
-{
- _tensor_info_map.emplace(ind, info);
-
- _tensor_layout_map.insert({ind, backend_layout});
-
- if (info.isDynamic())
- {
- _dynamic_tensor_mgr->buildTensor(ind, info, _tensor_layout_map[ind]);
- }
- else
- {
- _static_tensor_mgr->buildTensor(ind, info, _tensor_layout_map[ind], info.isConstant());
- }
-}
-
-void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
-{
- // TODO Enhance the way of checking user tensors
- if (_tensor_info_map.find(ind) == _tensor_info_map.end()) // Do not proceed for user tensors
- return;
-
- const auto tensor_info = _tensor_info_map.at(ind);
-
- if (!nativeOwnTensorAt(ind)->is_dynamic())
- {
- const auto size = tensor_info.total_size();
- _static_tensor_mgr->claimPlan(ind, size);
- }
-}
-
-void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
-{
- // TODO Enhance the way of checking user tensors
- if (_tensor_info_map.find(ind) == _tensor_info_map.end()) // Do not proceed for user tensors
- return;
-
- if (!nativeOwnTensorAt(ind)->is_dynamic())
- {
- _static_tensor_mgr->releasePlan(ind);
- }
-}
-
-bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
-{
- // User tensors are not registered in _tensor_info_map but objects for them are exist
- // in the tensor registry.
- // TODO Enhance the way of checking user tensors
- if (_tensor_reg->getITensor(ind))
- return true;
- return _tensor_info_map.find(ind) != _tensor_info_map.end();
-}
-
-void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); }
-
-void TensorBuilder::allocate()
-{
- // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate
- // This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
-}
-
-DynamicTensorManager *TensorBuilder::dynamicTensorManager(void)
-{
- return _dynamic_tensor_mgr.get();
-}
-
-cpu_common::Tensor *TensorBuilder::nativeOwnTensorAt(const ir::OperandIndex &ind)
-{
- return _tensor_reg->getNativeOwnTensor(ind);
-}
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_BUILDER_H__
-#define __ONERT_BACKEND_CONTROLFLOW_TENSOR_BUILDER_H__
-
-#include <backend/cpu_common/StaticTensorManager.h>
-#include <backend/cpu_common/TensorRegistry.h>
-#include <backend/cpu_common/Tensor.h>
-
-#include <ir/OperandIndexMap.h>
-
-#include <unordered_map>
-
-#include "DynamicTensorManager.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-class TensorBuilder
-{
-public:
- TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg);
-
- /**
- * @brief Register tensor information to allocate on CPU backend
- * @param[in] ind Operand index
- * @param[in] info Operand information
- * @param[in] layout Operand data layout
- */
- void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- ir::Layout backend_layout);
-
- void notifyFirstUse(const ir::OperandIndex &);
- void notifyLastUse(const ir::OperandIndex &);
-
- bool isRegistered(const ir::OperandIndex &) const;
-
- void prepare(void);
- void allocate();
- void postFunctionPrepare() { /* DO NOTHING */}
-
- DynamicTensorManager *dynamicTensorManager(void);
-
- /**
- * @brief Get tensor with a specific OperandIndex.
- * @param ind OperandIndex for the tensor. There must exist a tensor with this ind.
- * If not, program will crash with assert or exception.
- * @return operand::Tensor *
- */
- cpu_common::Tensor *nativeOwnTensorAt(const ir::OperandIndex &ind);
-
-private:
- const std::shared_ptr<TensorRegistry> _tensor_reg;
- std::unique_ptr<DynamicTensorManager> _dynamic_tensor_mgr;
- std::unique_ptr<cpu_common::StaticTensorManager> _static_tensor_mgr;
- ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
- ir::OperandIndexMap<ir::Layout> _tensor_layout_map;
-};
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_TENSOR_BUILDER_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__
-#define __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__
-
-#include "backend/cpu_common/TensorRegistry.h"
-#include "backend/ITensorRegistry.h"
-#include "Tensor.h"
-#include "IOTensor.h"
-#include <assert.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-/**
- * @brief Tensor registry class for controlflow backend
- *
- * This class contains three types of tensors. Two native tensors(tensors that are managed by this
- * backend) and the other is migrant tensor.
- *
- * - NativeIOTensor - @c IOTensor managed by this backend ( in @c _base_reg )
- * - NOTE The tensor it actually points to can be from another backend
- * - NativeOwnTensor - @c cpu_common::Tensor managed by this backend ( in @c _base_reg )
- * - MigrantTensor - @c IPortableTensor managed by other backends
- *
- * @note @c _base_reg is used in implementation to reuse @c cpu_common::StaticTensorManager
- *
- */
-class TensorRegistry : public ITensorRegistry
-{
-public:
- TensorRegistry() : _base_reg{new cpu_common::TensorRegistry} {}
-
- ITensor *getITensor(const ir::OperandIndex &ind) override
- {
- auto base_tensor = _base_reg->getITensor(ind);
- if (base_tensor)
- return base_tensor;
- return getNativeIOTensor(ind);
- }
-
- ITensor *getNativeITensor(const ir::OperandIndex &ind) override
- {
- auto base_tensor = _base_reg->getNativeITensor(ind);
- if (base_tensor)
- return base_tensor;
- return getNativeIOTensor(ind);
- }
-
- IPortableTensor *getPortableTensor(const ir::OperandIndex &ind)
- {
- auto base_tensor = _base_reg->getPortableTensor(ind);
- if (base_tensor)
- return base_tensor;
- return getNativeIOTensor(ind);
- }
-
- IPortableTensor *getNativeTensor(const ir::OperandIndex &ind)
- {
- auto base_tensor = _base_reg->getNativeTensor(ind);
- if (base_tensor)
- return base_tensor;
- return getNativeIOTensor(ind);
- }
-
- Tensor *getNativeOwnTensor(const ir::OperandIndex &ind)
- {
- return _base_reg->getNativeTensor(ind);
- }
-
- IOTensor *getNativeIOTensor(const ir::OperandIndex &ind)
- {
- auto tensor = _native_io_tensors.find(ind);
- if (tensor != _native_io_tensors.end())
- return tensor->second.get();
- return nullptr;
- }
-
- bool setMigrantTensor(const ir::OperandIndex &ind, IPortableTensor *tensor) override
- {
- assert(tensor);
- assert(!getITensor(ind)); // For the ind, tensor is not registered yet
- _base_reg->setMigrantTensor(ind, tensor);
- return true;
- }
-
- void setNativeOwnTensor(ir::OperandIndex ind, std::unique_ptr<Tensor> &&tensor)
- {
- assert(tensor);
- assert(!getITensor(ind)); // For the ind, tensor is not registered yet
- _base_reg->setNativeTensor(ind, std::move(tensor));
- }
-
- void setNativeIOTensor(ir::OperandIndex ind, std::unique_ptr<IOTensor> &&tensor)
- {
- assert(tensor);
- assert(!getITensor(ind)); // For the ind, tensor is not registered yet
- _native_io_tensors[ind] = std::move(tensor);
- }
-
- const ir::OperandIndexMap<std::unique_ptr<IOTensor>> &native_io_tensors()
- {
- return _native_io_tensors;
- }
- std::shared_ptr<cpu_common::TensorRegistry> base_reg() { return _base_reg; }
-
-private:
- std::shared_ptr<cpu_common::TensorRegistry> _base_reg;
- ir::OperandIndexMap<std::unique_ptr<IOTensor>> _native_io_tensors;
-};
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "UserTensor.h"
-
-#include "util/Exceptions.h"
-#include "ir/DataType.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-size_t UserTensor::calcOffset(const ir::Coordinates &coords) const
-{
- size_t rank = num_dimensions();
- size_t offset = 0;
- for (size_t i = 0; i < rank; ++i)
- {
- offset = offset * dimension(i) + coords[i];
- }
- offset *= sizeOfDataType(data_type());
- return offset;
-}
-
-bool UserTensor::applyShape(const ir::Shape &new_shape)
-{
- // User tensors cannot be reallocated.
- auto new_size = new_shape.num_elements() * ir::sizeOfDataType(data_type());
- if (total_size() < new_size)
- throw InsufficientBufferSizeException{"User given buffer size is too small."};
- setShape(new_shape);
- return true;
-}
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_USER_TENSOR_H__
-#define __ONERT_BACKEND_CONTROLFLOW_USER_TENSOR_H__
-
-#include "ir/OperandInfo.h"
-#include "backend/IPortableTensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-/**
- * @brief Tensor object that is for Input and Output tensors from the user.
- *
- * This class is a wrapped buffer that is allocated by the user. So it does not have resposibility
- * on allocation nor deallocation. All the model input/output tensors are wrapped with this class
- * for execution.
- *
- */
-class UserTensor : public IPortableTensor
-{
-public:
- UserTensor(const ir::OperandInfo &info, ir::Layout layout, uint8_t *buffer, size_t size)
- : IPortableTensor{info}, _layout{layout}, _buffer{buffer}, _size{size}, _dynamic{false}
- {
- }
-
- UserTensor(const ir::OperandInfo &info, ir::Layout layout) : UserTensor{info, layout, nullptr, 0}
- {
- }
-
-public:
- void setBuffer(uint8_t *buffer, size_t size)
- {
- _buffer = buffer;
- _size = size;
- }
-
-public:
- uint8_t *buffer() const override { return _buffer; }
- size_t total_size() const override { return _size; }
- size_t dimension(size_t index) const override { return _info.shape().dim(index); }
- size_t num_dimensions() const override { return _info.shape().rank(); }
- size_t calcOffset(const ir::Coordinates &coords) const override;
- ir::Layout layout() const override { return _layout; }
- ir::DataType data_type() const override { return _info.typeInfo().type(); }
- float data_scale() const override { return _info.typeInfo().scale(); }
- int32_t data_offset() const override { return _info.typeInfo().offset(); }
- bool is_dynamic() const override { return _dynamic; }
- void set_dynamic() override { _dynamic = true; }
- ir::Shape getShape() const override { return _info.shape(); }
- void setShape(const ir::Shape &new_shape) override { _info.shape(new_shape); }
- bool is_constant() const override { return false; }
- bool applyShape(const ir::Shape &) override;
-
-private:
- ir::Layout _layout;
- uint8_t *_buffer;
- size_t _size;
- bool _dynamic;
-};
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_USER_TENSOR_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "IfLayer.h"
-
-#include <backend/ITensor.h>
-#include "exec/ExecutorBase.h"
-#include "PermuteLayer.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-namespace kernel
-{
-
-IfLayer::IfLayer(backend::IPortableTensor *cond_tensor,
- const std::vector<backend::IPortableTensor *> input_tensors,
- const std::vector<backend::IPortableTensor *> output_tensors,
- const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
- exec::ExecutorMap *executor_map,
- const std::shared_ptr<ExternalContext> &external_context)
- : _cond_tensor{cond_tensor}, _input_tensors{input_tensors}, _output_tensors{output_tensors},
- _then_subg_index{then_subg_index}, _else_subg_index{else_subg_index},
- _executor_map{executor_map}, _external_context{external_context}
-{
- // At this point, executor_map may not have executors of then subg and else subg
-}
-
-void IfLayer::run()
-{
- // Check condition
- // // If true
- // // // Set _input_tensors -> then-subg's inputs
- // // // Set outputs of then-subg -> _output_tensors
- // // // Run then-subg
- // // Else
- // // // Set _input_tensors -> else-subg's inputs
- // // // Set outputs of else-subg -> _output_tensors
- // // // Run else-subg
-
- auto getResultCond = [](backend::IPortableTensor *tensor) -> bool {
- bool ret = false;
- tensor->access([&](ITensor &tensor) { ret = *reinterpret_cast<bool *>(tensor.buffer()); });
- return ret;
- };
-
- exec::IExecutor *subg_exec = nullptr;
- bool cond_result = getResultCond(_cond_tensor);
- if (cond_result)
- {
- VERBOSE(If) << "Call to $" << _then_subg_index << " (then)" << std::endl;
- subg_exec = _executor_map->at(_then_subg_index).get();
- }
- else
- {
- VERBOSE(If) << "Call to $" << _else_subg_index << " (else)" << std::endl;
- subg_exec = _executor_map->at(_else_subg_index).get();
- }
-
- subg_exec->execute(_input_tensors, _output_tensors);
- VERBOSE(If) << "Return from $" << (cond_result ? _then_subg_index : _else_subg_index)
- << std::endl;
-}
-
-} // namespace kernel
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_IF_LAYER_H__
-#define __ONERT_BACKEND_CONTROLFLOW_KERNEL_IF_LAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include <exec/IExecutor.h>
-#include "../ExternalContext.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-namespace kernel
-{
-
-class IfLayer : public ::onert::exec::IFunction
-{
-public:
- IfLayer(backend::IPortableTensor *cond_tensor,
- const std::vector<backend::IPortableTensor *> input_tensors,
- const std::vector<backend::IPortableTensor *> output_tensors,
- const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
- exec::ExecutorMap *executor_map,
- const std::shared_ptr<ExternalContext> &external_context);
-
-public:
- void run() override;
-
-private:
- backend::IPortableTensor *_cond_tensor;
- const std::vector<backend::IPortableTensor *> _input_tensors;
- const std::vector<backend::IPortableTensor *> _output_tensors;
- const ir::SubgraphIndex _then_subg_index;
- const ir::SubgraphIndex _else_subg_index;
- exec::ExecutorMap *_executor_map;
- const std::shared_ptr<ExternalContext> _external_context;
-};
-
-} // namespace kernel
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_KERNEL_IF_LAYER_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "PermuteLayer.h"
-
-#include "exec/ShapeConverter.h"
-
-#include "ruy/context.h" // from @ruy
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-namespace kernel
-{
-
-PermuteLayer::PermuteLayer(const std::vector<ITensor *> &src_tensors,
- const std::vector<ITensor *> &dst_tensors,
- const std::shared_ptr<ExternalContext> &external_context)
- : _external_context{external_context}, _tasks_map{}
-{
- assert(src_tensors.size() == dst_tensors.size());
- _src_tensors = src_tensors;
- _dst_tensors = dst_tensors;
- _src_tensors_offsets.resize(src_tensors.size());
- _dst_tensors_offsets.resize(dst_tensors.size());
-}
-
-void PermuteLayer::optimize()
-{
- // Remove copying of tensor as nullptr
- auto src_it = _src_tensors.begin();
- auto dst_it = _dst_tensors.begin();
- auto src_offsets_it = _src_tensors_offsets.begin();
- auto dst_offsets_it = _dst_tensors_offsets.begin();
- while (src_it != _src_tensors.end())
- {
- if ((*src_it == *dst_it) || (*src_it == nullptr || *dst_it == nullptr))
- {
- src_it = _src_tensors.erase(src_it);
- dst_it = _dst_tensors.erase(dst_it);
- src_offsets_it = _src_tensors_offsets.erase(src_offsets_it);
- dst_offsets_it = _dst_tensors_offsets.erase(dst_offsets_it);
- }
- else
- {
- auto src = *src_it;
- auto dst = *dst_it;
- src_offsets_it->resize(0);
- dst_offsets_it->resize(0);
- if (underlying_type(src->data_type()) != underlying_type(dst->data_type()))
- throw std::runtime_error("data type does not match");
- const auto permute_type = [&]() -> PermuteType {
- if (src->num_dimensions() == 4 && src->layout() == ir::Layout::NHWC &&
- dst->layout() == ir::Layout::NCHW)
- {
- return PermuteType::NHWC_TO_NCHW;
- }
- else if (src->num_dimensions() == 4 && src->layout() == ir::Layout::NCHW &&
- dst->layout() == ir::Layout::NHWC)
- {
- return PermuteType::NCHW_TO_NHWC;
- }
- else
- {
- return PermuteType::COPY;
- }
- }();
- auto fn = [&](backend::ITensor &src_tensor) {
- dst->access([&](backend::ITensor &dst_tensor) {
- // NOTE The buffer of both tensor can be nullptr in this step
- const auto data_size = ir::sizeOfDataType(src_tensor.data_type());
-
- if (permute_type == PermuteType::COPY)
- {
- if ((!src_tensor.has_padding() && !dst_tensor.has_padding()))
- {
- const auto num_elements = src_tensor.getShape().num_elements();
- const int thread_count = _external_context->ruy_context()->max_num_threads() <
- static_cast<int>(num_elements)
- ? _external_context->ruy_context()->max_num_threads()
- : num_elements;
-
- std::vector<PermuteWorkerTask> tasks;
- auto start = 0;
- for (auto i = 0; i < thread_count; ++i)
- {
- int end = start + (num_elements - start) / (thread_count - i);
- tasks.emplace_back(src_tensor.buffer(), dst_tensor.buffer(), start * data_size,
- start * data_size, (end - start) * data_size);
- start = end;
- }
- assert(tasks.size() >= 1);
- _tasks_map[src] = std::move(tasks);
- }
- else
- {
- auto loop_shape = src_tensor.getShape();
-
- auto copy_axis = loop_shape.rank() - 1;
- copy_axis = copy_axis < 0 ? 1 : copy_axis;
- const auto copy_len = loop_shape.dim(copy_axis) * data_size;
- loop_shape.dim(copy_axis) = 1;
-
- appendPermuteTasks(src, dst, loop_shape, copy_len);
- }
- }
- else
- {
- assert(src_tensor.num_dimensions() == 4 && (permute_type == PermuteType::NHWC_TO_NCHW ||
- permute_type == PermuteType::NCHW_TO_NHWC));
- const auto loop_shape = src_tensor.getShape();
- const auto copy_len = data_size;
-
- appendPermuteTasks(src, dst, loop_shape, copy_len);
- }
- });
- };
- src->access(fn);
- src_it++;
- dst_it++;
- src_offsets_it++;
- dst_offsets_it++;
- }
- }
-}
-
-void PermuteLayer::appendPermuteTasks(const ITensor *src_tensor, ITensor *dst_tensor,
- const ir::Shape &loop_shape, size_t size)
-{
- size_t distributed_dim = 0;
- if (src_tensor->layout() == dst_tensor->layout())
- {
- for (size_t i = 1; i < src_tensor->num_dimensions() - 1; ++i)
- {
- distributed_dim =
- src_tensor->dimension(distributed_dim) < src_tensor->dimension(i) ? i : distributed_dim;
- }
- }
- const auto distributed_dim_val = src_tensor->dimension(distributed_dim);
- const int thread_count =
- _external_context->ruy_context()->max_num_threads() < static_cast<int>(distributed_dim_val)
- ? _external_context->ruy_context()->max_num_threads()
- : distributed_dim_val;
- // NOTE Do not remove this assertion. It would cause performance degradation by new threads to be
- // created in the context's thread pool
- assert(thread_count <= _external_context->ruy_context()->max_num_threads());
-
- std::vector<PermuteWorkerTask> tasks;
- int start = 0;
- auto one_thread_loop_shape = loop_shape;
- for (auto i = 0; i < thread_count; ++i)
- {
- ir::Coordinates start_coords(one_thread_loop_shape.rank());
- start_coords.set(distributed_dim, start);
- int end = start + (distributed_dim_val - start) / (thread_count - i);
- one_thread_loop_shape.dim(distributed_dim) = end - start;
- tasks.emplace_back(*src_tensor, *dst_tensor, start_coords, one_thread_loop_shape, size);
- start = end;
- }
- assert(tasks.size() >= 1);
- _tasks_map[src_tensor] = std::move(tasks);
-}
-
-void PermuteLayer::runPermuteTasks(backend::ITensor *src, uint8_t *dst_buffer)
-{
- assert(src->getShape().num_elements() * ir::sizeOfDataType(src->data_type()) <=
- src->total_size());
- std::vector<PermuteWorkerTask> &tasks = _tasks_map.at(src);
- for (size_t i = 0; i < tasks.size(); ++i)
- {
- tasks.at(i).setBuffers(src->buffer(), dst_buffer);
- }
- assert(tasks.size() >= 1);
- _external_context->ruy_context()->mutable_thread_pool()->Execute(tasks.size(), tasks.data());
-}
-
-void PermuteLayer::run()
-{
- assert(_src_tensors.size() == _dst_tensors.size());
- // PermuteLayer infers dynamic shape inside itself whenever run is called for the following
- // reasons:
- // 1. PermuteLayer has to access dynamic tensor manager for input/output tensors of other backends
- // 2. Other controlflow operation(If/While) uses this layout for copying tensors of other
- // subgraphs(with other backends)
- // 3. This infering code is placed here to avoid duplicated code that can be caused by above 2
- // reasons
-
- // check if output is not dynamic
- for (size_t i = 0; i < _src_tensors.size(); ++i)
- {
- auto dst_tensor = _dst_tensors.at(i);
- auto src_tensor = _src_tensors.at(i);
- if (src_tensor->is_dynamic() || dst_tensor->is_dynamic())
- {
- // getting output shape
- auto src_shape = src_tensor->getShape();
-
- // set output shape and output buffer
- ir::Shape new_shape =
- exec::convertShape(src_shape, src_tensor->layout(), dst_tensor->layout());
-
- try
- {
- if (!dst_tensor->applyShape(new_shape))
- throw std::runtime_error{
- "Error: PermuteLayer: output's TensorManager does not support dynamic tensor"};
- assert(dst_tensor->buffer() != nullptr);
- }
- catch (const std::out_of_range &e)
- {
- std::cerr << "Error: out_of_range in PermuteLayer: output's TensorManager does not support "
- "dynamic tensor"
- << '\n';
- throw;
- }
- }
- assert(exec::convertShape(src_tensor->getShape(), src_tensor->layout(), dst_tensor->layout()) ==
- dst_tensor->getShape());
- }
- assert(_src_tensors.size() == _dst_tensors.size());
- assert(_src_tensors.size() == _src_tensors_offsets.size());
- assert(_dst_tensors.size() == _dst_tensors_offsets.size());
- auto src_it = _src_tensors.begin();
- auto dst_it = _dst_tensors.begin();
- auto src_offsets_it = _src_tensors_offsets.begin();
- auto dst_offsets_it = _dst_tensors_offsets.begin();
- while (src_it != _src_tensors.end())
- {
- auto src = *src_it;
- auto dst = *dst_it;
- auto &src_offsets = *src_offsets_it;
- auto &dst_offsets = *dst_offsets_it;
-
- if (src->total_size() == 0)
- {
- assert(dst->total_size() == 0);
- }
- else
- {
- if (src != dst)
- {
- // Conditions to run permutation with multithreading
- // 1. The tasks for multithreathing was created
- // 2. The tasks's size > 1
- // 3. Both tensors are not dynamic
- if (_tasks_map.find(src) == _tasks_map.end() || _tasks_map.at(src).size() == 1 ||
- src->is_dynamic() || dst->is_dynamic())
- {
- permute(src, dst, src->num_dimensions(), src_offsets, dst_offsets);
- }
- // If dst is subtensor, we have to use clEnqueueMapBuffer instead of clEnqueueWirteBuffer
- else if (dst->needMemoryMap() && !dst->is_subtensor())
- {
- if (!src->has_padding() && !dst->has_padding() && src->layout() == dst->layout())
- {
- // This is more effective than multi-threading
- src->access([&](backend::ITensor &) { dst->enqueueWriteBuffer(src->buffer(), false); });
- }
- else
- {
- // TODO Optimize this block in case of that padding size of dst is big.
- _buffers_map[dst].reserve(dst->total_size());
- auto dst_buffer = _buffers_map[dst].data();
-
- src->access([&](backend::ITensor &) { runPermuteTasks(src, dst_buffer); });
- dst->enqueueWriteBuffer(dst_buffer, false);
- }
- }
- else if (src->needMemoryMap() && !src->is_subtensor() && !src->has_padding() &&
- !dst->has_padding() && src->layout() == dst->layout())
- {
- // This is more effective than multi-threading
- assert(!dst->needMemoryMap());
- dst->access([&](backend::ITensor &) { src->enqueueReadBuffer(dst->buffer(), true); });
- }
- else
- {
- auto fn = [&](backend::ITensor &) {
- dst->access([&](backend::ITensor &) { runPermuteTasks(src, dst->buffer()); });
- };
- src->access(fn);
- }
- }
- }
- src_it++;
- dst_it++;
- src_offsets_it++;
- dst_offsets_it++;
- }
-}
-
-} // namespace kernel
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_PERMUTELAYER_H__
-#define __ONERT_BACKEND_CONTROLFLOW_KERNEL_PERMUTELAYER_H__
-
-#include "exec/IPermuteFunction.h"
-#include "exec/IExecutor.h"
-#include "../ExternalContext.h"
-#include "ruy/thread_pool.h" // from @ruy
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-namespace kernel
-{
-
-class PermuteLayer : public onert::exec::IPermuteFunction
-{
-public:
- PermuteLayer(const std::vector<ITensor *> &src_tensors, const std::vector<ITensor *> &dst_tensors,
- const std::shared_ptr<ExternalContext> &external_context);
-
- void optimize() override;
-
- void run() override;
-
-private:
- std::shared_ptr<ExternalContext> _external_context;
-
-private:
- void appendPermuteTasks(const ITensor *src_tensor, ITensor *dst_tensor,
- const ir::Shape &loop_shape, size_t size);
-
- void runPermuteTasks(backend::ITensor *src, uint8_t *dst_buffer);
-
- struct PermuteWorkerTask : ruy::Task
- {
- using Strides = ir::Coordinates;
-
- PermuteWorkerTask(const ITensor &src_tensor, ITensor &dst_tensor,
- const ir::Coordinates &start_coords, const ir::Shape &loop_shape, size_t size)
- : _src_buffer{src_tensor.buffer()}, _dst_buffer{dst_tensor.buffer()},
- _src_start_offset{src_tensor.calcOffset(start_coords)},
- _dst_start_offset{dst_tensor.calcOffset(start_coords)}, _src_strides{}, _dst_strides{},
- _loop_shape{loop_shape}, _size{size}, _src_layout{src_tensor.layout()},
- _dst_layout{dst_tensor.layout()}, _is_permutation{true}
- {
- // Set strides
- setStrides(src_tensor, &_src_strides);
- setStrides(dst_tensor, &_dst_strides);
-
- _is_permutation = (_src_layout != _dst_layout && loop_shape.rank() == 4);
- }
- // Constructor for a copy
- PermuteWorkerTask(const uint8_t *src_buffer, uint8_t *dst_buffer, uint32_t src_start_offset,
- uint32_t dst_start_offset, size_t size)
- : _src_buffer{src_buffer}, _dst_buffer{dst_buffer}, _src_start_offset{src_start_offset},
- _dst_start_offset{dst_start_offset}, _src_strides{0}, _dst_strides{0}, _loop_shape{1},
- _size{size}, _src_layout{}, _dst_layout{}, _is_permutation{false}
- {
- // DO NOTHING
- }
- void setBuffers(const uint8_t *src_buffer, uint8_t *dst_buffer)
- {
- _src_buffer = src_buffer;
- _dst_buffer = dst_buffer;
- }
- void Run() override
- {
- ShapeLoop(_loop_shape, [&](const onert::ir::Coordinates &coords) {
- size_t src_offset = _src_start_offset;
- size_t dst_offset = _dst_start_offset;
- assert(static_cast<size_t>(_loop_shape.rank()) == coords.size());
- ir::Coordinates dst_coords = coords;
- if (_is_permutation)
- {
- dst_coords = ir::convertCoordinates(coords, _src_layout, _dst_layout);
- }
- for (auto i = 0; i < _loop_shape.rank(); ++i)
- {
- assert(coords[i] >= 0 && dst_coords[i] >= 0);
- src_offset += coords[i] * _src_strides[i];
- dst_offset += dst_coords[i] * _dst_strides[i];
- }
- memcpy(_dst_buffer + dst_offset, _src_buffer + src_offset, _size);
- });
- }
-
- private:
- void setStrides(const ITensor &tensor, Strides *strides)
- {
- const size_t rank = tensor.num_dimensions();
- for (size_t i = 0; i < rank; ++i)
- {
- ir::Coordinates no_step(rank), one_step(rank);
- one_step.set(i, 1);
- if (tensor.dimension(i) > 1)
- {
- strides->set(i, tensor.calcOffset(one_step) - tensor.calcOffset(no_step));
- }
- else
- {
- // If dimension value is 0 or 1, the stride of the dimension will be not used
- // Do not call calcOffset() with coordinate value that is greater than dimension value
- strides->set(i, 0);
- }
- assert((*strides)[i] >= 0);
- }
- }
-
- private:
- const uint8_t *_src_buffer;
- uint8_t *_dst_buffer;
- size_t _src_start_offset;
- size_t _dst_start_offset;
- Strides _src_strides;
- Strides _dst_strides;
- const ir::Shape _loop_shape;
- const size_t _size;
- const ir::Layout _src_layout;
- const ir::Layout _dst_layout;
- bool _is_permutation;
- };
- std::unordered_map<const ITensor *, std::vector<PermuteWorkerTask>> _tasks_map;
-};
-
-} // namespace kernel
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_KERNEL_PERMUTELAYER_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "WhileLayer.h"
-
-#include <algorithm>
-#include <backend/ITensor.h>
-#include "exec/ExecutorBase.h"
-#include <misc/polymorphic_downcast.h>
-#include "PermuteLayer.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-namespace kernel
-{
-
-WhileLayer::WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors,
- const std::vector<backend::IPortableTensor *> output_tensors,
- const ir::SubgraphIndex &cond_subg_index,
- const ir::SubgraphIndex &body_subg_index, exec::ExecutorMap *executor_map,
- cpu_common::DynamicMemoryManager *dyn_memory_manager,
- const std::shared_ptr<ExternalContext> &external_context)
- : _cond_subg_index{cond_subg_index}, _body_subg_index{body_subg_index},
- _input_tensors{input_tensors}, _output_tensors{output_tensors}, _executor_map{executor_map},
- _dyn_memory_manager{dyn_memory_manager}, _external_context{external_context}
-{
- // At this point, executor_map may not have executors of cond subg and body subg
-}
-
-void WhileLayer::run()
-{
- // Copy "_input_tensors" -> "cond subg inputs"
- // Run cond subg
- // Start loop while output of cond subg is ture
- // // Copy "_input_tensors" -> "body subg inputs" in the first iteration, then copy "body subg
- // outputs" -> "body subg inputs" in the second or more iterations
- // // Run body subg
- // // Copy "body subg outputs" -> "cond subg inputs"
- // // Run cond subg
- // If there is no loop copy "_input_tensors" -> "_dst_tensors", else copy "cond subg inputs" ->
- // "_dst_tensors"
- auto cond_exec = _executor_map->at(_cond_subg_index).get();
- auto body_exec = _executor_map->at(_body_subg_index).get();
-
- // Need a temp tensor to hold the cond subgraph output
- assert(cond_exec->getOutputTensors().size() == 1);
- auto cond_output_tensor = [&]() {
- auto cond_output = cond_exec->getOutputTensors().at(0);
- auto tensor = std::make_unique<Tensor>(cond_output->orig_info(), cond_output->orig_layout(),
- _dyn_memory_manager);
- tensor->set_dynamic();
- tensor->setBuffer(_dyn_memory_manager->allocate(tensor.get(), tensor->total_size()));
- return tensor;
- }();
-
- VERBOSE(While) << "Call to $" << _cond_subg_index << " (cond)" << std::endl;
- cond_exec->execute(_input_tensors, {cond_output_tensor.get()});
- VERBOSE(While) << "Return from $" << _cond_subg_index << std::endl;
-
- auto getResultCond = [](backend::ITensor *tensor) -> bool {
- bool ret = false;
- tensor->access([&](ITensor &tensor) { ret = *reinterpret_cast<bool *>(tensor.buffer()); });
- return ret;
- };
-
- std::vector<ITensor *> op_inputs(_input_tensors.begin(), _input_tensors.end());
- std::vector<ITensor *> op_outputs(_output_tensors.begin(), _output_tensors.end());
- // Copying body inputs to outputs when the loop body is never executed
- if (!getResultCond(cond_output_tensor.get()))
- {
- PermuteLayer copy_body_inputs_to_op_outputs{op_inputs, op_outputs, _external_context};
- copy_body_inputs_to_op_outputs.run();
- return;
- }
-
- // Need some temp tensors to hold the body subgraph output
- std::vector<std::unique_ptr<Tensor>> temp_outputs_o;
- std::vector<IPortableTensor *> temp_outputs;
- for (auto io_tensor : body_exec->getOutputTensors())
- {
- auto tensor = std::make_unique<Tensor>(io_tensor->orig_info(), io_tensor->orig_layout(),
- _dyn_memory_manager);
- tensor->set_dynamic();
- tensor->setBuffer(_dyn_memory_manager->allocate(tensor.get(), tensor->total_size()));
- temp_outputs.push_back(tensor.get());
- temp_outputs_o.push_back(std::move(tensor));
- }
-
- std::vector<ITensor *> body_outputs(temp_outputs.begin(), temp_outputs.end());
- PermuteLayer copy_body_outputs_to_op_outputs{body_outputs, op_outputs, _external_context};
-
- const auto body_execute_with_op_inputs = [&]() {
- VERBOSE(While) << "Call to $" << _body_subg_index << " (body)" << std::endl;
- body_exec->execute(_input_tensors, temp_outputs);
- VERBOSE(While) << "Return from $" << _body_subg_index << std::endl;
- };
-
- const auto body_execute_with_body_outputs = [&]() {
- VERBOSE(While) << "Call to $" << _body_subg_index << " (body)" << std::endl;
- body_exec->execute(_output_tensors, temp_outputs);
- VERBOSE(While) << "Return from $" << _body_subg_index << std::endl;
- };
-
- std::function<void()> body_execute = body_execute_with_op_inputs;
- const auto cond_execute = [&]() {
- VERBOSE(While) << "Call to $" << _cond_subg_index << " (cond)" << std::endl;
- cond_exec->execute(_output_tensors, {cond_output_tensor.get()});
- VERBOSE(While) << "Return from $" << _cond_subg_index << std::endl;
- };
-
- // Loop while Cond subgraph's output is true
- while (getResultCond(cond_output_tensor.get()))
- {
- body_execute();
- copy_body_outputs_to_op_outputs.run();
- cond_execute();
- body_execute = body_execute_with_body_outputs;
- }
-
- // Clean-up the temp tensors
- _dyn_memory_manager->deallocate(cond_output_tensor.get());
- for (auto tensor : temp_outputs)
- {
- _dyn_memory_manager->deallocate(tensor);
- }
-}
-
-} // namespace kernel
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_WHILE_LAYER_H__
-#define __ONERT_BACKEND_CONTROLFLOW_KERNEL_WHILE_LAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include <exec/IExecutor.h>
-#include <exec/IFunction.h>
-#include <ir/OperandIndexSequence.h>
-#include <ir/Graph.h>
-#include "../ExternalContext.h"
-
-#include "backend/cpu_common/MemoryManager.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-namespace kernel
-{
-
-class WhileLayer : public ::onert::exec::IFunction
-{
-public:
- WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors,
- const std::vector<backend::IPortableTensor *> output_tensors,
- const ir::SubgraphIndex &cond_subg_index, const ir::SubgraphIndex &body_subg_index,
- exec::ExecutorMap *executor_map, cpu_common::DynamicMemoryManager *dyn_memory_manager,
- const std::shared_ptr<ExternalContext> &external_context);
-
-public:
- void run() override;
-
-private:
- const ir::SubgraphIndex _cond_subg_index;
- const ir::SubgraphIndex _body_subg_index;
- const std::vector<backend::IPortableTensor *> _input_tensors;
- const std::vector<backend::IPortableTensor *> _output_tensors;
- exec::ExecutorMap *_executor_map;
- cpu_common::DynamicMemoryManager *_dyn_memory_manager; // For generating temp tensors
- const std::shared_ptr<ExternalContext> _external_context;
-};
-
-} // namespace kernel
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_KERNEL_WHILE_LAYER_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "backend/cpu_common/Allocator.h"
-
-#include "util/logging.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-Allocator::Allocator(uint32_t capacity)
-{
- _base = std::make_unique<uint8_t[]>(capacity);
-
- VERBOSE(ALLOC) << "allocation capacity: " << capacity << std::endl;
- VERBOSE(ALLOC) << "base pointer: " << static_cast<void *>(_base.get()) << std::endl;
-}
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "backend/cpu_common/BackendContextHelpers.h"
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "backend/cpu_common/ConstantInitializer.h"
-#include "backend/cpu_common/Tensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<ITensorRegistry> &tensor_reg)
- : ConstantInitializerBase{operands}, _tensor_reg{tensor_reg}
-{
- // DO NOTHING
-}
-
-void ConstantInitializer::registerDefaultInitializer(const ir::OperandIndex &index,
- const ir::Operand &obj)
-{
- registerExternalInitializer(index, obj);
-}
-
-void ConstantInitializer::registerExternalInitializer(const ir::OperandIndex &index,
- const ir::Operand &obj)
-{
- // For only CONSTANTS
- // TODO Add to check if tensor has been allocated
- if (!obj.isConstant())
- return;
-
- _init_map[index] = [](const onert::ir::Operand &model_obj, onert::backend::ITensor &itensor) {
- auto data = model_obj.shareData();
- assert(data && data->base());
- ExternalTensor &tensor = dynamic_cast<ExternalTensor &>(itensor);
- tensor.setData(data);
- };
-}
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "backend/cpu_common/ConstantInitializerBase.h"
-
-#include <Half.h>
-
-using float16 = Half;
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-void ConstantInitializerBase::registerCopyInitializer(const ir::OperandIndex &index,
- const ir::Operand &obj)
-{
- // For only CONSTANTS
- // TODO Add to check if tensor has been allocated
- if (!obj.isConstant())
- return;
-
- const auto type = obj.typeInfo().type();
- using ir::DataType;
-
- switch (type)
- {
- case DataType::FLOAT32:
- _init_map[index] = copyInit<float>;
- break;
- case DataType::INT32:
- _init_map[index] = copyInit<int32_t>;
- break;
- case DataType::UINT32:
- _init_map[index] = copyInit<uint32_t>;
- break;
- case DataType::BOOL8:
- case DataType::QUANT_UINT8_ASYMM:
- _init_map[index] = copyInit<uint8_t>;
- break;
- case DataType::QUANT_INT8_SYMM:
- case DataType::QUANT_INT8_ASYMM:
- _init_map[index] = copyInit<int8_t>;
- break;
- case DataType::FLOAT16:
- _init_map[index] = copyInit<float16>;
- break;
- case DataType::INT64:
- _init_map[index] = copyInit<int64_t>;
- break;
- default:
- throw std::runtime_error("Not supported, yet");
- break;
- }
-}
-
-void ConstantInitializerBase::registerPermuteInitializer(const ir::OperandIndex &index,
- const ir::Operand &obj)
-{
- // For only CONSTANTS
- // TODO Add to check if tensor has been allocated
- if (!obj.isConstant())
- return;
-
- const auto type = obj.typeInfo().type();
- using ir::DataType;
- using namespace std::placeholders;
-
- switch (type)
- {
- case DataType::FLOAT32:
- _init_map[index] = std::bind(permuteInit<float>, _1, _2, _current_layout);
- break;
- case DataType::INT32:
- _init_map[index] = std::bind(permuteInit<int32_t>, _1, _2, _current_layout);
- break;
- case DataType::UINT32:
- _init_map[index] = std::bind(permuteInit<uint32_t>, _1, _2, _current_layout);
- break;
- case DataType::BOOL8:
- case DataType::QUANT_UINT8_ASYMM:
- _init_map[index] = std::bind(permuteInit<uint8_t>, _1, _2, _current_layout);
- break;
- case DataType::QUANT_INT8_SYMM:
- case DataType::QUANT_INT8_ASYMM:
- _init_map[index] = std::bind(permuteInit<int8_t>, _1, _2, _current_layout);
- break;
- case DataType::FLOAT16:
- _init_map[index] = std::bind(permuteInit<float16>, _1, _2, _current_layout);
- break;
- case DataType::INT64:
- _init_map[index] = std::bind(permuteInit<int64_t>, _1, _2, _current_layout);
- break;
- default:
- throw std::runtime_error("Not supported, yet");
- break;
- }
-}
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "backend/cpu_common/DynamicTensorManager.h"
-
-#include "util/logging.h"
-#include "misc/polymorphic_downcast.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<TensorRegistry> ®)
- : _dynamic_mem_mgr{new DynamicMemoryManager()}, _tensors{reg}
-{
- // DO NOTHING
-}
-
-void DynamicTensorManager::buildTensor(const ir::OperandIndex &ind,
- const ir::OperandInfo &tensor_info,
- ir::Layout backend_layout)
-{
- assert(_tensors->getNativeTensor(ind) == nullptr);
- auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout, _dynamic_mem_mgr.get());
- _tensors->setNativeTensor(ind, std::move(tensor));
-}
-
-void DynamicTensorManager::planDealloc(ir::OperationIndex op_ind, backend::ITensor *tensor)
-{
- _dealloc_tensor_map[op_ind].emplace(tensor);
-}
-
-void DynamicTensorManager::deallocInput(ir::OperationIndex op_ind)
-{
- auto find = _dealloc_tensor_map.find(op_ind);
- if (find == _dealloc_tensor_map.end())
- return;
-
- auto &input_set = find->second;
- for (auto *tensor : input_set)
- {
- if (!tensor->is_dynamic())
- continue;
-
- _dynamic_mem_mgr->deallocate(tensor);
-
- auto *cpu_tensor = nnfw::misc::polymorphic_downcast<cpu_common::Tensor *>(tensor);
- cpu_tensor->resetBuffer();
-
- VERBOSE(DynamicTensorManager) << "Deallocating tensor " << (void *)cpu_tensor
- << " (input of op_ind: " << op_ind.value() << ")" << std::endl;
- }
-}
-
-const ITensor *DynamicTensorManager::getRawITensor(ir::OperandIndex ind)
-{
- auto ptr = _tensors->getITensor(ind);
- assert(ptr);
- return ptr;
-}
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <backend/cpu_common/MemoryManager.h>
-
-#include <cassert>
-
-#include "MemoryPlannerFactory.h"
-#include "util/ConfigSource.h"
-#include "util/logging.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-MemoryManager::MemoryManager() : _mem_planner{createMemoryPlanner()}
-{
- // DO NOTHING
-}
-
-MemoryManager::MemoryManager(const std::string planner_id)
- : _mem_planner{createMemoryPlanner(planner_id)}
-{
- // DO NOTHING
-}
-
-cpu_common::IMemoryPlanner *MemoryManager::createMemoryPlanner()
-{
- auto planner_id = util::getConfigString(util::config::CPU_MEMORY_PLANNER);
- return cpu_common::MemoryPlannerFactory::get().create(planner_id);
-}
-
-cpu_common::IMemoryPlanner *MemoryManager::createMemoryPlanner(const std::string planner_id)
-{
- return cpu_common::MemoryPlannerFactory::get().create(planner_id);
-}
-
-void MemoryManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
-{
- _mem_planner->claim(ind, size);
-}
-
-void MemoryManager::releasePlan(const ir::OperandIndex &ind) { _mem_planner->release(ind); }
-
-void MemoryManager::allocate(void)
-{
- _mem_alloc = std::make_shared<cpu_common::Allocator>(_mem_planner->capacity());
- assert(_mem_alloc->base());
-}
-
-uint8_t *MemoryManager::getBuffer(const ir::OperandIndex &ind) const
-{
- assert(_mem_planner->memory_plans().find(ind) != _mem_planner->memory_plans().end());
- const auto &mem_blk = _mem_planner->memory_plans().at(ind);
- return _mem_alloc->base() + mem_blk.offset;
-}
-
-std::shared_ptr<cpu_common::Allocator> DynamicMemoryManager::allocate(const ITensor *tensor,
- uint32_t capacity)
-{
- auto find = _mem_alloc_map.find(tensor);
- if (find != _mem_alloc_map.end())
- throw std::runtime_error("Cannot allocate memory for a tensor. It was already allocated.");
-
- _mem_alloc_map[tensor] = std::make_shared<cpu_common::Allocator>(capacity);
- return _mem_alloc_map[tensor];
-}
-
-void DynamicMemoryManager::deallocate(const ITensor *tensor)
-{
- auto find = _mem_alloc_map.find(tensor);
- if (find == _mem_alloc_map.end())
- throw std::runtime_error("Cannot find Allocator for the requested index");
-
- find->second->release(); // explicitly erase memory
- _mem_alloc_map.erase(find); // remove tensor and alloc
-}
-
-void DynamicMemoryManager::deallocate(void)
-{
- for (auto &mem_alloc : _mem_alloc_map)
- {
- // Release memory buffer of mem_alloc
- mem_alloc.second->release();
- }
-
- _mem_alloc_map.clear();
-}
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MemoryPlanner.h"
-#include "util/logging.h"
-#include <cassert>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-void BumpPlanner::claim(const ir::OperandIndex &ind, size_t size)
-{
- Block blk{_capacity, size};
- _mem_plans[ind] = blk;
- _capacity += size;
-
- VERBOSE(BP_PLANNER) << "CLAIM(#" << ind.value() << "): " << blk.offset << ", " << blk.size
- << std::endl;
-}
-
-void BumpPlanner::release(const ir::OperandIndex &ind)
-{
- VERBOSE(BP_PLANNER) << "RELEASE(#" << ind.value() << "): "
- << "NOTHING does" << std::endl;
-}
-
-// There are some assumptions for claiming memory(== making a reservation for memory).
-// 1. About _claim_table(std::map).
-// - The table's data structure is std::map so that it always sorts
-// value(OperandIndex) by key(base_offset).
-// - This claim() inserts key/value into _claim_table and the release() removes the key/value from
-// _claim_table.
-// - _claim_table shows the memory status at a certain point in time. Therefore,
-// - If _claim_table has an offset and a certain size at a certain point in time,
-// it means the place at the offset has been already claimed(== can't claim now. need to find
-// someplace new).
-// - If _claim_table doesn't have any element for an offset and a certain size at a certain
-// point in time, it means the place at the offset can be claimed.
-// 2. In the loop for _claim_table, we can assume the current claim_base_offset value is bigger than
-// the previous claim_base_offset.
-void FirstFitPlanner::claim(const ir::OperandIndex &ind, size_t size)
-{
- // Find the right position for claiming
- uint32_t next_offset = 0;
- for (auto &mem_claim : _claim_table)
- {
- auto claimed_base_offset = mem_claim.first;
- auto claimed_size = _mem_plans[mem_claim.second].size;
- if (next_offset + size <= claimed_base_offset)
- {
- break;
- }
- else
- {
- next_offset = claimed_base_offset + claimed_size;
- }
- }
-
- // Now next_offset is set to the proper offset
- _claim_table[next_offset] = ind;
- _mem_plans[ind] = {next_offset, size};
-
- VERBOSE(FF_PLANNER) << "claim(#" << ind.value() << "): [+" << next_offset << ", " << size << "sz]"
- << std::endl;
-
- if (_capacity < next_offset + size)
- {
- _capacity = next_offset + size;
- }
-}
-
-void FirstFitPlanner::release(const ir::OperandIndex &ind)
-{
- for (auto it = _claim_table.cbegin(); it != _claim_table.cend(); ++it)
- {
- if (it->second == ind)
- {
- uint32_t offset = it->first;
- uint32_t index = ind.value();
- uint32_t size = _mem_plans[ind].size;
-
- _claim_table.erase(it);
-
- VERBOSE(FF_PLANNER) << "release(#" << index << "): [+" << offset << ", " << size << "sz]"
- << std::endl;
- return;
- }
- }
- assert(!"Cannot release for given index. It has been not claimed or released already.");
-}
-
-WICPlanner::WICPlanner()
- : _initialized(false), _capacity(0), _mem_plans(), _live_operands(), _interference_graph(),
- _operands()
-{
- // DO NOTHING
-}
-
-void WICPlanner::claim(const ir::OperandIndex &ind, size_t size)
-{
- _operands.emplace(size, ind);
- _interference_graph[ind].insert(_interference_graph[ind].end(), _live_operands.cbegin(),
- _live_operands.cend());
- for (const auto &live_operand : _live_operands)
- {
- _interference_graph[live_operand].emplace_back(ind);
- }
- _live_operands.emplace(ind);
-
- VERBOSE(WIC_PLANNER) << "claim(#" << ind.value() << "): [" << size << "sz]" << std::endl;
-}
-
-void WICPlanner::release(const ir::OperandIndex &ind)
-{
- _live_operands.erase(ind);
- VERBOSE(WIC_PLANNER) << "release(#" << ind.value() << ")" << std::endl;
-}
-
-/*
- * Build memory plans using liveness and size of operands
- * 1. Build inference graph at claim
- * - Two operands interfere if they have overlapped live range
- * 2. Sort operands in descending order of size
- * - Use std::multimap to sort operands
- * 3. Allocate memory block for sorted operands
- * - Find free memory block which does not overlap with interfered operands
- */
-void WICPlanner::buildMemoryPlans()
-{
- for (const auto &operand : _operands)
- {
- uint32_t size = operand.first;
- const ir::OperandIndex &ind = operand.second;
- VERBOSE(WIC_PLANNER) << "build_plan(#" << ind.value() << "): [" << size << "sz]" << std::endl;
-
- uint32_t next_offset = 0;
- if (_interference_graph.count(ind))
- {
- // Find interfered memory plans and sort them by offset
- std::multimap<uint32_t, uint32_t> interfered_plans;
- for (const auto &interference : _interference_graph[ind])
- {
- if (_mem_plans.count(interference))
- interfered_plans.emplace(_mem_plans[interference].offset, _mem_plans[interference].size);
- }
-
- // Find free memory block in first-fit manner
- for (const auto &interfered_plan : interfered_plans)
- {
- auto claimed_base_offset = interfered_plan.first;
- auto claimed_size = interfered_plan.second;
- VERBOSE(WIC_PLANNER) << "interfere : [+" << claimed_base_offset << ", " << claimed_size
- << "sz]" << std::endl;
- if (next_offset + size <= claimed_base_offset)
- {
- break;
- }
- else if (next_offset < claimed_base_offset + claimed_size)
- {
- next_offset = claimed_base_offset + claimed_size;
- }
- }
- }
- else
- {
- VERBOSE(WIC_PLANNER) << "No interference" << std::endl;
- }
-
- _mem_plans[ind] = {next_offset, size};
- VERBOSE(WIC_PLANNER) << "alloc(#" << ind.value() << "): [+" << next_offset << ", " << size
- << "sz]" << std::endl;
-
- if (_capacity < next_offset + size)
- {
- _capacity = next_offset + size;
- }
- }
- _initialized = true;
- _interference_graph.clear();
- _operands.clear();
-}
-
-WICPlanner::MemoryPlans &WICPlanner::memory_plans()
-{
- if (!_initialized)
- buildMemoryPlans();
- return _mem_plans;
-}
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file       MemoryPlanner.h
- * @brief      This file contains Memory Planning related classes
- */
-
-#ifndef __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_H__
-#define __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_H__
-
-#include <map>
-#include <vector>
-#include <unordered_set>
-#include <memory>
-
-#include "backend/cpu_common/Allocator.h"
-#include "backend/cpu_common/IMemoryPlanner.h"
-#include "ir/OperandIndexMap.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-/**
- * @brief Class to plan memory by bump way
- */
-class BumpPlanner : public IMemoryPlanner
-{
-public:
- /**
- * @brief Claim memory for operand by bump way
- * @param[in] index The operand index
- * @param[in] size The size of the memory
- */
- void claim(const ir::OperandIndex &, size_t) override;
- /**
- * @brief Release memory for operand by bump way
- * @param[in] index The operand index
- */
- void release(const ir::OperandIndex &) override;
- /**
- * @brief Get capacity for memory planning
- * @return The value of capacity
- */
- uint32_t capacity() override { return _capacity; }
- /**
- * @brief Get MemoryPlans
- * @return MemoryPlans
- */
- MemoryPlans &memory_plans() override { return _mem_plans; }
-
-private:
- uint32_t _capacity = 0;
- MemoryPlans _mem_plans;
-};
-
-/**
- * @brief Class to plan memory by firstfit way
- */
-class FirstFitPlanner : public IMemoryPlanner
-{
-public:
- /**
- * @brief Claim memory for operand by firstfit way
- * @param[in] index The operand index
- * @param[in] size The size of the memory
- */
- void claim(const ir::OperandIndex &, size_t) override;
- /**
- * @brief Release memory for operand by firstfit way
- * @param[in] index The operand index
- */
- void release(const ir::OperandIndex &) override;
- /**
- * @brief Get capacity for memory planning
- * @return The value of capacity
- */
- uint32_t capacity() override { return _capacity; }
- /**
- * @brief Get MemoryPlans
- * @return MemoryPlans
- */
- MemoryPlans &memory_plans() override { return _mem_plans; }
-
-private:
- uint32_t _capacity = 0;
- MemoryPlans _mem_plans;
- // Use std::map because claim() assumes that _claim_table is sorted by uint32_t(base_offset)
- std::map<uint32_t, ir::OperandIndex> _claim_table;
-};
-
-/**
- * @brief Class to plan memory by Weighted Interval Color algorithm
- */
-class WICPlanner : public IMemoryPlanner
-{
-public:
- WICPlanner();
-
- /**
- * @brief Claim memory for operand by WIC algorithm
- * @param[in] index The operand index
- * @param[in] size The size of the memory
- */
- void claim(const ir::OperandIndex &, size_t) override;
- /**
- * @brief Release memory for operand by WIC algorithm
- * @param[in] index The operand index
- */
- void release(const ir::OperandIndex &) override;
- /**
- * @brief Get capacity for memory planning
- * @return The value of capacity
- */
- uint32_t capacity() override
- {
- if (!_initialized)
- buildMemoryPlans();
- return _capacity;
- }
- /**
- * @brief Get MemoryPlans
- * @return MemoryPlans
- */
- MemoryPlans &memory_plans() override;
-
-private:
- void buildMemoryPlans();
-
- bool _initialized;
- uint32_t _capacity;
- MemoryPlans _mem_plans;
- std::unordered_set<ir::OperandIndex> _live_operands;
- ir::OperandIndexMap<std::vector<ir::OperandIndex>> _interference_graph;
- // Sort operands by descending order of size
- std::multimap<uint32_t, ir::OperandIndex, std::greater<uint32_t>> _operands;
-};
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_H__
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "MemoryPlanner.h"
-#include "ir/Index.h"
-
-TEST(Allocator, allocate_test)
-{
- ::onert::backend::cpu_common::Allocator allocator(1024);
- ASSERT_NE(allocator.base(), nullptr);
-}
-
-TEST(BumpPlanner, claim_test)
-{
- ::onert::backend::cpu_common::BumpPlanner planner;
-
- auto claim = [&planner](uint32_t index, size_t size, uint32_t expected_offset) {
- onert::ir::OperandIndex mem_idx(index);
- planner.claim(mem_idx, size);
- auto mem_blk = planner.memory_plans()[mem_idx];
- ASSERT_EQ(mem_blk.offset, expected_offset);
- ASSERT_EQ(mem_blk.size, size);
- };
-
- claim(0, 10, 0);
- claim(1, 20, 10);
- claim(2, 30, 30);
-}
-
-TEST(FirstFitPlanner, claim_release_test)
-{
- ::onert::backend::cpu_common::FirstFitPlanner planner;
-
- auto claim = [&planner](uint32_t index, size_t size, uint32_t expected_offset) {
- onert::ir::OperandIndex mem_idx(index);
- planner.claim(mem_idx, size);
- auto mem_blk = planner.memory_plans()[mem_idx];
- ASSERT_EQ(mem_blk.offset, expected_offset);
- ASSERT_EQ(mem_blk.size, size);
- };
-
- auto release = [&planner](uint32_t index) {
- onert::ir::OperandIndex mem_idx(index);
- planner.release(mem_idx);
- };
-
- // 0 CLAIM - 10
- claim(0, 10, 0);
-
- // 1 CLAIM - 20
- claim(1, 20, 10);
-
- // 2 CLAIM - 30
- claim(2, 30, 30);
-
- // 0 RELEASE - 10
- release(0);
-
- // 3 CLAIM - 20
- claim(3, 20, 60);
-
- // 4 CLAIM - 5
- claim(4, 5, 0);
-
- // 5 CLAIM - 10
- claim(5, 10, 80);
-
- // 6 CLAIM - 5
- claim(6, 5, 5);
-
- // 2 RELEASE - 30
- release(2);
-
- // 7 CLAIM - 35
- claim(7, 35, 90);
-
- // 8 CLAIM - 10
- claim(8, 10, 30);
-
- // 4 RELEASE - 5
- release(4);
-
- // 9 CLAIM - 10
- claim(9, 10, 40);
-
- // 10 CLAIM - 10
- claim(10, 10, 50);
-
- // 6 RELEASE
- release(6);
-
- // 1 RELEASE
- release(1);
-
- // 8 RELEASE
- release(8);
-
- // 9 RELEASE
- release(9);
-
- // 10 RELEASE
- release(10);
-
- // 3 RELEASE
- release(3);
-
- // 5 RELEASE
- release(5);
-
- // 7 RELEASE
- release(7);
-}
-
-TEST(WICPlanner, claim_release_test)
-{
- ::onert::backend::cpu_common::WICPlanner planner;
-
- auto claim = [&planner](uint32_t index, size_t size) {
- onert::ir::OperandIndex mem_idx(index);
- planner.claim(mem_idx, size);
- };
-
- auto release = [&planner](uint32_t index) {
- onert::ir::OperandIndex mem_idx(index);
- planner.release(mem_idx);
- };
-
- auto verify = [&planner](uint32_t index, uint32_t size, uint32_t expected_offset) {
- onert::ir::OperandIndex mem_idx(index);
- auto mem_blk = planner.memory_plans()[mem_idx];
- ASSERT_EQ(mem_blk.offset, expected_offset);
- ASSERT_EQ(mem_blk.size, size);
- };
-
- auto capacity = [&planner](uint32_t expected_capacity) {
- auto actual_capacity = planner.capacity();
- ASSERT_EQ(actual_capacity, expected_capacity);
- };
-
- claim(0, 20);
- claim(1, 5);
- release(0);
- claim(2, 10);
- release(1);
- claim(3, 10);
- release(2);
- claim(4, 10);
- release(3);
- claim(5, 20);
- release(4);
- claim(6, 20);
- release(5);
- release(7);
-
- // VERIFY 0 - 0
- verify(0, 20, 0);
-
- // VERIFY 1 - 20
- verify(1, 5, 20);
-
- // VERIFY 2 - 0
- verify(2, 10, 0);
-
- // VERIFY 3 - 10
- verify(3, 10, 10);
-
- // VERIFY 4 - 20
- verify(4, 10, 20);
-
- // VERIFY 5 - 0
- verify(5, 20, 0);
-
- // VERIFY 6 - 20
- verify(6, 20, 20);
-
- // CAPACITY - 40
- capacity(40);
-}
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MemoryPlannerFactory.h"
-
-#include "MemoryPlanner.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-MemoryPlannerFactory &MemoryPlannerFactory::get()
-{
- static MemoryPlannerFactory instance;
- return instance;
-}
-
-IMemoryPlanner *MemoryPlannerFactory::create(const std::string &key)
-{
- if (key == "FirstFit")
- {
- return new FirstFitPlanner;
- }
- else if (key == "Bump")
- {
- return new BumpPlanner;
- }
- else if (key == "WIC")
- {
- return new WICPlanner;
- }
- return new FirstFitPlanner; // Default Planner
-}
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_FACTORY_H__
-#define __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_FACTORY_H__
-
-#include "backend/cpu_common/IMemoryPlanner.h"
-
-#include <string>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-class MemoryPlannerFactory
-{
-public:
- static MemoryPlannerFactory &get();
-
-private:
- MemoryPlannerFactory() = default;
-
-public:
- IMemoryPlanner *create(const std::string &key);
-};
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_FACTORY_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "backend/cpu_common/StaticTensorManager.h"
-
-#include "backend/cpu_common/DynamicTensorManager.h"
-#include "backend/cpu_common/Tensor.h"
-#include <util/logging.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> ®,
- DynamicTensorManager *dynamic_tensor_manager)
- : _nonconst_mgr{new MemoryManager()}, _tensors{reg},
- _dynamic_tensor_manager{dynamic_tensor_manager}
-{
- // DO NOTHING
-}
-
-void StaticTensorManager::allocateNonconsts(void)
-{
- _nonconst_mgr->allocate();
-
- for (auto &pair : _tensors->native_tensors())
- {
- const auto &ind = pair.first;
- auto tensor = pair.second.get();
- if (!_as_constants[ind] && !tensor->is_dynamic())
- {
- auto *buffer = _nonconst_mgr->getBuffer(ind);
- tensor->setBuffer(buffer);
-
- VERBOSE(CPU_StaticTensorManager) << "TENSOR(#" << ind.value()
- << "): " << static_cast<void *>(buffer) << std::endl;
- }
- }
-}
-
-void StaticTensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); }
-
-void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
- const ir::OperandInfo &tensor_info, ir::Layout backend_layout,
- bool as_const)
-{
- assert(!_tensors->getNativeTensor(ind));
- if (as_const)
- {
- auto tensor = std::make_unique<ExternalTensor>(tensor_info, backend_layout);
- _tensors->setNativeTensor(ind, std::move(tensor));
- }
- else
- {
- auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout,
- _dynamic_tensor_manager->dynamic_mem_mgr().get());
- _tensors->setNativeTensor(ind, std::move(tensor));
- }
- _as_constants[ind] = as_const;
-}
-
-void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
-{
- assert(_tensors->getNativeTensor(ind));
-
- // This method is called only when a tensor has proper shape
- assert(!_tensors->getNativeTensor(ind)->is_dynamic());
-
- if (!_as_constants[ind])
- _nonconst_mgr->claimPlan(ind, size);
-}
-
-void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
-{
- assert(_tensors->getNativeTensor(ind));
-
- // This method is called only when a tensor has proper shape
- assert(!_tensors->getNativeTensor(ind)->is_dynamic());
-
- if (!_as_constants[ind])
- _nonconst_mgr->releasePlan(ind);
-}
-
-void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
-{
- for (const auto &it : _tensors->native_tensors())
- fn(it.first);
-}
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "backend/cpu_common/Tensor.h"
-
-#include "ir/DataType.h"
-#include "backend/cpu_common/MemoryManager.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-Tensor::~Tensor() {}
-
-size_t Tensor::calcOffset(const ir::Coordinates &coords) const
-{
- size_t rank = num_dimensions();
- rank = rank == 0 ? 1 : rank;
- size_t offset = 0;
- for (size_t i = 0; i < rank; ++i)
- {
- offset = offset * dimension(i) + coords[i];
- }
- offset *= sizeOfDataType(data_type());
- return offset;
-}
-
-void Tensor::setShape(const ir::Shape &new_shape) { _info.shape(new_shape); }
-
-bool Tensor::applyShape(const ir::Shape &new_shape)
-{
- bool previously_dynamic = is_dynamic();
-
- auto allocTensorMem = [&](bool overwrite = false) {
- auto capacity = total_size();
- auto alloc = _dynamic_mem_mgr->allocate(this, capacity);
-
- if (overwrite)
- overwriteBuffer(alloc);
- else
- setBuffer(alloc);
- };
-
- if (!previously_dynamic)
- {
- // TODO deallocate tensor->buffer()
- // issue is that staticTensorManager might have allocate this memory
- setShape(new_shape);
- set_dynamic();
- allocTensorMem(true);
- }
- else if (buffer() == nullptr)
- {
- setShape(new_shape);
- set_dynamic();
- allocTensorMem();
- }
- // when buffer was already allocated and new_shape requires different size
- else
- {
- auto previous_size = total_size();
- auto new_size = new_shape.num_elements() * ir::sizeOfDataType(data_type());
- if (previous_size != new_size)
- {
- _dynamic_mem_mgr->deallocate(this);
-
- setShape(new_shape);
- set_dynamic();
- allocTensorMem(true);
- }
- else
- { // when buffer with same size was already allocated, shape could differ
- setShape(new_shape);
- }
- }
- return true;
-}
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-// ExternalTensor
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-// `dynamic_cast` not working across library boundaries on NDK
-// With this as a key function, `dynamic_cast` works across dl
-ExternalTensor::~ExternalTensor() {}
-
-} // namespace cpu
-} // namespace backend
-} // namespace onert
#include <dlfcn.h>
#include "backend/Backend.h"
-#include "backend/controlflow/Backend.h"
-#include "backend/controlflow/Config.h"
+#include "backend/builtin/Backend.h"
+#include "backend/builtin/Config.h"
#include "backend/IConfig.h"
#include "util/logging.h"
#include "util/ConfigSource.h"
static const char *SHARED_LIB_EXT =
#if defined(__APPLE__) && defined(__MACH__)
- ".dylib";
+ ".dylib";
#else
- ".so";
+ ".so";
#endif
namespace onert
return object;
}
-BackendManager::BackendManager() { loadControlflowBackend(); }
+BackendManager::BackendManager() { loadBuiltinBackend(); }
-void BackendManager::loadControlflowBackend()
+void BackendManager::loadBuiltinBackend()
{
- auto backend_object = std::unique_ptr<backend::controlflow::Backend, backend_destroy_t>(
- new backend::controlflow::Backend, [](backend::Backend *backend) { delete backend; });
+ auto backend_object = std::unique_ptr<backend::builtin::Backend, backend_destroy_t>(
+ new backend::builtin::Backend, [](backend::Backend *backend) { delete backend; });
bool initialized = backend_object->config()->initialize(); // Call initialize here?
if (!initialized)
{
- throw std::runtime_error(backend::controlflow::Config::ID + " backend initialization failed");
+ throw std::runtime_error(backend::builtin::Config::ID + " backend initialization failed");
}
- _controlflow = backend_object.get(); // Save the controlflow backend implementation pointer
- assert(_controlflow);
+ _builtin = backend_object.get(); // Save the builtin backend implementation pointer
+ assert(_builtin);
_gen_map.emplace(backend_object->config()->id(), std::move(backend_object));
}
}
auto backend_object =
- std::unique_ptr<backend::Backend, backend_destroy_t>(backend_create(), backend_destroy);
+ std::unique_ptr<backend::Backend, backend_destroy_t>(backend_create(), backend_destroy);
bool initialized = backend_object->config()->initialize(); // Call initialize here?
if (!initialized)
{
}
// Save backend handle (avoid warning by handle lost without dlclose())
-
- // NOTE This is a workaround for clang-format3.9 (seems like it does not understand
- // "by-copy capture with an initializer"
- // clang-format off
auto u_handle = std::unique_ptr<void, dlhandle_destroy_t>{
- handle, [id = backend, filename = backend_so](void *h) {
- if (dlclose(h) == 0)
- {
- VERBOSE(BackendManager) << "Successfully unloaded '" << id << "'(" << filename << ")\n";
- }
- else
- {
- VERBOSE(BackendManager)
- << "Failed to unload backend '" << id << "'- " << dlerror() << "\n";
- }
- }};
-// clang-format on
-_handle_map.emplace(backend, std::move(u_handle));
+ handle, [id = backend, filename = backend_so](void *h) {
+ if (dlclose(h) == 0)
+ {
+ VERBOSE(BackendManager) << "Successfully unloaded '" << id << "'(" << filename << ")\n";
+ }
+ else
+ {
+ VERBOSE(BackendManager) << "Failed to unload backend '" << id << "'- " << dlerror() << "\n";
+ }
+ }};
+ _handle_map.emplace(backend, std::move(u_handle));
}
backend::Backend *BackendManager::get(const std::string &key)
return nullptr;
}
-const backend::controlflow::Backend *BackendManager::getControlflow() const { return _controlflow; }
+const backend::builtin::Backend *BackendManager::getBuiltin() const { return _builtin; }
} // namespace compiler
} // namespace onert
#include "compiler/Compiler.h"
-#include "ParamChecker.h"
#include "ExecutorFactory.h"
#include "ShapeValidator.h"
-#include "Fp32ToFp16Converter.h"
-#include <backend/controlflow/Config.h>
+#include <backend/builtin/Config.h>
#include "compiler/BackendManager.h"
#include "compiler/IScheduler.h"
#include "compiler/ManualScheduler.h"
#include "compiler/HEScheduler.h"
#include "compiler/StaticShapeInferer.h"
+#include "compiler/OperationLowerInfo.h"
#include "compiler/pass/ConstantOutputPass.h"
#include "compiler/pass/OddOutputPass.h"
#include "compiler/pass/PassRunner.h"
+#include "compiler/pass/UnusedOperandEliminationPass.h"
#include "exec/ExecTime.h"
-#include "ir/operation/LowerInfo.h"
#include "ir/verifier/Verifier.h"
#include "dumper/dot/DotDumper.h"
#include "compiler/Linear.h"
options.backend_list = nnfw::misc::split(util::getConfigString(util::config::BACKENDS), ';');
options.trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH);
options.graph_dump_level = util::getConfigInt(util::config::GRAPH_DOT_DUMP);
- options.op_seq_max_node = util::getConfigInt(util::config::OP_SEQ_MAX_NODE);
options.executor = util::getConfigString(util::config::EXECUTOR);
options.he_scheduler = util::getConfigBool(util::config::USE_SCHEDULER);
options.he_profiling_mode = util::getConfigBool(util::config::PROFILING_MODE);
options.disable_compile = util::getConfigBool(util::config::DISABLE_COMPILE);
options.fp16_enable = util::getConfigBool(util::config::FP16_ENABLE);
-#ifdef RUY_PROFILER
- options.op_seq_max_node = 1;
-#endif
{
// Backend for all
auto key = static_cast<uint32_t>(std::stoi(key_str));
subgs.at(ir::SubgraphIndex{0})
- ->operations()
- .at(ir::OperationIndex{key}); // Check if exist, or this wil throw
+ ->operations()
+ .at(ir::OperationIndex{key}); // Check if exist, or this wil throw
ms_options.index_to_backend.emplace(ir::OperationIndex{key}, val);
}
}
}
Compiler::Compiler(const std::shared_ptr<ir::Subgraphs> &subgs, util::TracingCtx *tracing_ctx)
- : _subgraphs{subgs}, _state{State::CREATED}
+ : _subgraphs{subgs}, _state{State::CREATED}
{
// Set default values for CompilerOptions
// All these default values should not be fetched from Env, when we stop supporting Android NN
{
// Set control flow backend for control flow operators
{
- auto &cfid = backend::controlflow::Config::ID;
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = cfid;
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = cfid;
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = cfid;
+ auto &builtin_id = backend::builtin::Config::ID;
+ _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id;
+ _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id;
+ _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id;
}
// FIXME This is a workaround for bcq operations, should remove it
}
{
- VERBOSE(Compiler) << std::boolalpha;
- VERBOSE(Compiler) << "==== Compiler Options ====" << std::endl;
+ VERBOSE(Compiler) << std::boolalpha << "==== Compiler Options ====" << std::endl;
VERBOSE(Compiler) << "backend_list : "
<< nnfw::misc::join(_options.backend_list.begin(),
_options.backend_list.end(), "/")
<< std::endl;
VERBOSE(Compiler) << "trace_filepath : " << _options.trace_filepath << std::endl;
VERBOSE(Compiler) << "graph_dump_level : " << _options.graph_dump_level << std::endl;
- VERBOSE(Compiler) << "op_seq_max_node : " << _options.op_seq_max_node << std::endl;
VERBOSE(Compiler) << "executor : " << _options.executor << std::endl;
VERBOSE(Compiler) << "manual backend_for_all : "
<< _options.manual_scheduler_options.backend_for_all << std::endl;
VERBOSE(Compiler) << "he_scheduler : " << _options.he_scheduler << std::endl;
VERBOSE(Compiler) << "he_profiling_mode : " << _options.he_profiling_mode << std::endl;
VERBOSE(Compiler) << "disable_compile : " << _options.disable_compile << std::endl;
- VERBOSE(Compiler) << "fp16_enable : " << _options.fp16_enable << std::endl;
- VERBOSE(Compiler) << std::noboolalpha;
+ VERBOSE(Compiler) << "fp16_enable : " << _options.fp16_enable << std::endl
+ << std::noboolalpha;
}
_subgraphs->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
// Mandatory passes
pass::PassRunner{}
- .append(std::make_unique<pass::ConstantOutputPass>(subg))
- .append(std::make_unique<pass::OddOutputPass>(subg))
- .run();
+ .append(std::make_unique<pass::ConstantOutputPass>(subg))
+ .append(std::make_unique<pass::OddOutputPass>(subg))
+ .run();
+
+ // Optimizations
+ pass::PassRunner{}.append(std::make_unique<pass::UnusedOperandEliminationPass>(subg)).run();
});
/***************************************************
// Compilable check
// TODO: Support hybrid execution -
// execution between interpreter and compiled executor (including control flow)
- if (!checkCompilable())
+ if (_options.disable_compile)
{
_subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
executors->emplace(index, std::make_unique<interp::InterpExecutor>(subg));
// Lower: Assign backend
lowered_subgs[index] = std::make_unique<compiler::LoweredGraph>(subg, _options);
- // Check backend(s) for subgraph support FP16
- bool backends_support_fp16 = true;
- auto &contexts = (*lowered_subgs[index]).backend_contexts();
- for (auto it = contexts.begin(); it != contexts.end(); it++)
- {
- // Controlflow backend is not for actual computaion of operations so it is an exception
- if (it->first->config()->id() != backend::controlflow::Config::ID)
- backends_support_fp16 &= it->first->config()->supportFP16();
- }
-
- if (_options.fp16_enable && backends_support_fp16)
- {
- // NOTE: the only acl_cl backend enables fp16 mode
- Fp32ToFp16Converter(*lowered_subgs[index]).run();
- }
-
subg.setSubgraphs(nullptr);
});
{
const auto primary_subg_idx = ir::SubgraphIndex{0};
StaticShapeInferer inferer(primary_subg_idx, lowered_subgs);
- lowered_subgs.at(primary_subg_idx)
- ->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
- auto has_dynamic_tensor = inferer.infer(op_seq);
- op_seq.has_dynamic_tensor(has_dynamic_tensor);
- });
+ auto &lowered_subg = lowered_subgs.at(primary_subg_idx);
+ auto ordered_ops = lowered_subg->graph().topolSortOperations();
+ for (auto op_ind : ordered_ops)
+ {
+ const auto &op = lowered_subg->graph().operations().at(op_ind);
+ bool has_dynamic_tensor = inferer.infer(op);
+ lowered_subg->setHasDynamicTensor(op_ind, has_dynamic_tensor);
+ }
inferer.dump();
}
ir::OperationDumper dumper("Executor generation of Subgraph " +
std::to_string(subg_index.value()));
lowered_subg->graph().operations().iterate(
- [&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); });
+ [&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); });
auto executor = std::unique_ptr<exec::IExecutor>{
- ExecutorFactory::get().create(std::move(lowered_subg), _options, executors)};
+ ExecutorFactory::get().create(std::move(lowered_subg), _options, executors)};
executor->setIndexedRanks(indexed_ranks);
executors->insert(std::make_pair(subg_index, std::move(executor)));
}
return executors;
}
-bool Compiler::checkCompilable()
-{
- // Disable compile phase
- // When ready to use interpreter backend, remove this config and use backend setting
- if (_options.disable_compile)
- {
- return false;
- }
-
- // TODO check unspecified operand shape
-
- // Check compilable parameter
- for (uint32_t i = 0; i < _subgraphs->count(); ++i)
- {
- auto graph = _subgraphs->at(ir::SubgraphIndex{i});
- ParamChecker paramChecker{graph};
- paramChecker();
- if (paramChecker.haveNoneConstParam())
- {
- return false;
- }
- }
-
- return true;
-}
-
} // namespace compiler
} // namespace onert
#include <deque>
#include <functional>
+#include "ir/OperationCloner.h"
#include "exec/ExecutionObservers.h"
#include "exec/LinearExecutor.h"
#include "exec/DataflowExecutor.h"
#include "compiler/ExecutionBuilder.h"
#include "exec/ExecTime.h"
#include "compiler/Linear.h"
+#include "compiler/BackendManager.h"
#include "backend/IPortableTensor.h"
-#include "backend/controlflow/Config.h"
-#include "backend/controlflow/KernelGenerator.h"
-#include "backend/controlflow/UserTensor.h"
-#include "backend/controlflow/TensorBuilder.h"
+#include "backend/builtin/Config.h"
+#include "backend/builtin/KernelGenerator.h"
+#include "backend/builtin/UserTensor.h"
+#include "backend/builtin/TensorBuilder.h"
#include "util/TracingCtx.h"
+#include "dumper/text/GraphDumper.h"
#include <memory>
public:
virtual ~SyncFunction() = default;
SyncFunction(std::unique_ptr<exec::IFunction> fn, const std::shared_ptr<backend::IConfig> config)
- : _fn{std::move(fn)}, _config{config}
+ : _fn{std::move(fn)}, _config{config}
{
assert(_fn);
assert(_config);
std::shared_ptr<backend::IConfig> _config;
};
+using DeallocList = std::vector<backend::ITensor *>;
+// Deallocation after execution of an operation used by Linear Executor
+class DeallocFunction final : public exec::IFunction
+{
+public:
+ DeallocFunction(const DeallocList &tensors) : _dealloc_list{tensors} {}
+
+ void run() override
+ {
+ for (auto tensor : _dealloc_list)
+ {
+ if (!tensor->is_dynamic())
+ continue;
+ tensor->deallocBuffer();
+ }
+ }
+
+private:
+ DeallocList _dealloc_list;
+};
+
void initializeSubgraphIOTensors(compiler::LoweredGraph &lowered_graph,
+ const backend::BackendContexts &backend_contexts,
const ir::OperandIndexSequence &indices)
{
- // TODO Store controlflow backend in BackendContext
- std::shared_ptr<backend::controlflow::TensorRegistry> cf_tensor_reg;
- for (const auto &e : lowered_graph.backend_contexts())
+ // TODO Store builtin backend in BackendContext
+ std::shared_ptr<backend::builtin::TensorRegistry> builtin_tensor_reg;
+ for (const auto &e : backend_contexts)
{
auto backend = e.first;
auto &context = e.second;
- if (backend->config()->id() == backend::controlflow::Config::ID)
+ if (backend->config()->id() == backend::builtin::Config::ID)
{
- cf_tensor_reg =
- std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(context->tensor_registry);
+ builtin_tensor_reg =
+ std::dynamic_pointer_cast<backend::builtin::TensorRegistry>(context->tensor_registry);
}
}
- assert(cf_tensor_reg);
+ assert(builtin_tensor_reg);
for (auto ind : indices)
{
const auto &operand = lowered_graph.graph().operands().at(ind);
- auto tensor = std::make_unique<backend::controlflow::IOTensor>(
- operand.info(),
- ir::Layout::NHWC /* FIXME find op_seq for this operand and use frontend_layout */
- );
+ auto tensor = std::make_unique<backend::builtin::IOTensor>(
+ operand.info(),
+ ir::Layout::NHWC /* FIXME find operation for this operand and use frontend_layout */
+ );
- // Add tensor to controlflow TensorRegistry.
- cf_tensor_reg->setNativeIOTensor(ind, std::move(tensor));
+ // Add tensor to builtin TensorRegistry.
+ builtin_tensor_reg->setNativeIOTensor(ind, std::move(tensor));
}
}
+backend::BackendContexts createBackendContexts(compiler::LoweredGraph &lgraph, bool linear_executor)
+{
+ backend::BackendContexts contexts;
+ auto &backend_manager = compiler::BackendManager::get();
+
+ std::unordered_map<const backend::Backend *, backend::ContextData> context_data_map;
+
+ // Generate partial graphs for each backend
+ for (auto backend : backend_manager.getAll())
+ {
+ auto &data = context_data_map[backend];
+ auto graph = std::make_unique<ir::Graph>();
+ graph->setLayout(lgraph.graph().layout());
+ data.graph = std::move(graph);
+ }
+
+ auto &whole_graph = lgraph.graph();
+ // Separate operands into partial graphs
+ whole_graph.operands().iterate([&](const ir::OperandIndex &operand_ind, ir::Operand &operand) {
+ auto &operand_li = lgraph.lower_info().operand;
+ const auto &def_factors = operand_li.at(operand_ind).def_factors();
+ if (def_factors.size() == 0) // Ignore unused tensor
+ return;
+ const auto &def_factor = def_factors.getOnlyElement();
+ const auto backend = def_factor.backend();
+ auto &partial_graph = *context_data_map[backend].graph;
+ auto &operand_layouts = context_data_map[backend].operand_layouts;
+ assert(operand_layouts.find(operand_ind) == operand_layouts.end());
+ operand_layouts[operand_ind] = def_factor.layout();
+
+ // Copy the operand and insert it to the partial graph
+ auto new_operand = std::make_unique<ir::Operand>(operand);
+ new_operand->clearDefUse();
+ operand.releaseData(); // Deref data of LoweredGraph
+ auto new_operand_ind = partial_graph.addOperand(operand_ind, std::move(new_operand));
+ UNUSED_RELEASE(new_operand_ind);
+ assert(new_operand_ind == operand_ind);
+ });
+ // Separate operations into partial graphs
+ whole_graph.operations().iterate(
+ [&](const ir::OperationIndex &op_ind, const ir::Operation &operation) {
+ auto &op_li = lgraph.lower_info().operation;
+ auto backend = op_li.at(op_ind).backend();
+ auto &partial_graph = *context_data_map[backend].graph;
+ auto &external_operands = context_data_map[backend].external_operands;
+ auto &operand_layouts = context_data_map[backend].operand_layouts;
+
+ {
+ // Add missing operands (externals)
+ auto io_list = (operation.getInputs() + operation.getOutputs()) | ir::Remove::DUPLICATED |
+ ir::Remove::UNDEFINED;
+ for (auto operand_ind : io_list)
+ {
+ if (partial_graph.operands().exist(operand_ind))
+ continue;
+
+ // Copy the operand and insert it to the partial graph
+ const auto &operand = whole_graph.operands().at(operand_ind);
+ auto new_operand = std::make_unique<ir::Operand>(operand);
+ new_operand->clearDefUse();
+ auto new_operand_ind = partial_graph.addOperand(operand_ind, std::move(new_operand));
+ UNUSED_RELEASE(new_operand_ind);
+ assert(new_operand_ind == operand_ind);
+
+ auto layout =
+ lgraph.lower_info().operand.at(operand_ind).def_factors().getOnlyElement().layout();
+ assert(operand_layouts.find(operand_ind) == operand_layouts.end());
+ operand_layouts[operand_ind] = layout;
+ external_operands.add(operand_ind);
+ }
+
+ auto new_op_ind = partial_graph.addOperation(op_ind, clone(operation));
+ UNUSED_RELEASE(new_op_ind);
+ assert(new_op_ind == op_ind);
+ }
+ });
+
+ // Create contexts
+ auto whole_op_order = lgraph.graph().topolSortOperations();
+ for (auto &pair : context_data_map)
+ {
+ auto backend = pair.first;
+ auto &data = pair.second;
+ // Handle graph input/outputs or external tensors
+ data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
+ if (whole_graph.getInputs().contains(ind) || whole_graph.getOutputs().contains(ind))
+ data.external_operands.add(ind);
+ // Inputs are either "graph input" or "no def op and non-constant"
+ if (whole_graph.getInputs().contains(ind) ||
+ (!operand.getDef().valid() && !operand.isConstant()))
+ // Outputs are either "graph output" or "no uses"
+ data.graph->addInput(ind);
+ if (whole_graph.getOutputs().contains(ind) || operand.getUses().size() == 0)
+ data.graph->addOutput(ind);
+ });
+ dumper::text::dumpGraph(*data.graph);
+
+ std::copy_if(whole_op_order.begin(), whole_op_order.end(), std::back_inserter(data.op_order),
+ [&](const auto &ind) { return data.graph->operations().exist(ind); });
+ data.is_linear_executor = linear_executor;
+ data.custom_kernel_builder = lgraph.graph().getKernelBuilder();
+ contexts.emplace(backend, backend->newContext(std::move(data)));
+ }
+ return contexts;
+}
+
} // namespace
} // namespace onert
return _map.at(options.executor)(std::move(lowered_graph), options, executor_map);
}
-void ExecutorFactory::initializeBackendContext(compiler::LoweredGraph *lowered_graph)
-{
- struct Entry
- {
- std::vector<backend::BackendContext::OperationInfo> operation_list;
- std::vector<ir::OperandIndex> operand_list;
- };
- std::unordered_map<const backend::Backend *, Entry> backend_assets;
-
- // Build lists for operations
- lowered_graph->op_seqs().iterate(
- [&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
- auto &op_seq_li = lowered_graph->getLowerInfo()->op_seq;
- auto backend = op_seq_li.at(op_seq_index)->backend();
- for (auto &operation_idx : op_seq.operations())
- {
- backend_assets[backend].operation_list.emplace_back(operation_idx, op_seq.getLayout());
- }
- });
-
- // Build lists for operands
- lowered_graph->graph().operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
- const auto lower_info = lowered_graph->getLowerInfo(ind);
- for (auto factor : lower_info->def_factors())
- {
- auto backend = factor.backend();
- backend_assets[backend].operand_list.emplace_back(ind);
- }
- });
-
- for (auto &pair : backend_assets)
- {
- auto backend = pair.first;
- auto &arg = pair.second;
- lowered_graph->backend_contexts().at(backend)->initialize(arg.operation_list, arg.operand_list);
- }
-}
-
-void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_graph)
+void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_graph,
+ const backend::BackendContexts &backend_contexts)
{
- TensorRegistries tensor_regs{lowered_graph.backend_contexts(), true};
-
- lowered_graph.op_seqs().iterate(
- [&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
- auto lower_info = lowered_graph.getLowerInfo(op_seq_index);
- auto &backend_ctx = lowered_graph.backend_contexts().at(lower_info->backend());
- for (auto ind : (op_seq.getInputs() + op_seq.getOutputs()) | ir::Remove::DUPLICATED |
- ir::Remove::UNDEFINED)
+ TensorRegistries tensor_regs{backend_contexts, true};
+
+ lowered_graph.graph().operations().iterate(
+ [&](const ir::OperationIndex &op_ind, const ir::Operation &op) {
+ auto lower_info = lowered_graph.lower_info().operation.getRawPtr(op_ind);
+ auto &backend_ctx = backend_contexts.at(lower_info->backend());
+ for (auto ind :
+ (op.getInputs() + op.getOutputs()) | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
+ {
+ // If an Operation's input/output tensor does not have an own tensor object,
+ // it must be using migrant tensors, so find the tensor from other tensor registries and
+ // register it to the current tensor registry if it is portable
+ if (!backend_ctx->tensor_registry->getITensor(ind))
{
- // If an OpSequence input/output tensor does not have a own tensor object,
- // it must be using migrant tensors, so find the tensor from other tensor builders and
- // set the tensor to this tensor builder if portable
- if (!backend_ctx->tensor_registry->getITensor(ind))
- {
- auto tensor = tensor_regs.getITensor(ind);
- assert(tensor); // The tensor must have been registered
- auto ptensor = dynamic_cast<backend::IPortableTensor *>(tensor);
- if (ptensor)
- backend_ctx->tensor_registry->setMigrantTensor(ind, ptensor);
- }
+ auto tensor = tensor_regs.getITensor(ind);
+ assert(tensor); // The tensor must have been registered
+ auto ptensor = dynamic_cast<backend::IPortableTensor *>(tensor);
+ if (ptensor)
+ backend_ctx->tensor_registry->setMigrantTensor(ind, ptensor);
}
- });
+ }
+ });
}
exec::IExecutor *
const compiler::CompilerOptions &options,
const std::shared_ptr<exec::ExecutorMap> &executor_map)
{
- const auto &backend_contexts = lowered_graph->backend_contexts();
-
- initializeBackendContext(lowered_graph.get());
+ auto graph = lowered_graph->graph();
- TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
+ backend::BackendContexts backend_contexts =
+ createBackendContexts(*lowered_graph, options.executor == "Linear");
- assert(!lowered_graph->graph().isBuildingPhase());
+ TensorRegistries tensor_regs{backend_contexts, true};
initializeSubgraphIOTensors(
- *lowered_graph, (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
- ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
+ *lowered_graph, backend_contexts,
+ (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
+ ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
// linearize
auto order = Linear::linearize(*lowered_graph);
for (auto &pair : backend_contexts)
{
- pair.second->genTensors(order, lowered_graph->op_seqs(), *lowered_graph->getLowerInfo());
+ pair.second->genTensors();
}
- prepareMigrantTensors(*lowered_graph);
+ prepareMigrantTensors(*lowered_graph, backend_contexts);
- // Give some runtime objects to controlflow KernelGenerator
+ // Give some runtime objects to builtin KernelGenerator
for (auto &pair : backend_contexts)
{
- auto cf_context = dynamic_cast<backend::controlflow::BackendContext *>(pair.second.get());
- if (cf_context != nullptr)
+ auto builtin_context = dynamic_cast<backend::builtin::BackendContext *>(pair.second.get());
+ if (builtin_context != nullptr)
{
- auto cf_kernel_gen = cf_context->kernel_gen;
- cf_kernel_gen->setTensorRegistries(tensor_regs);
- cf_kernel_gen->setExecutorMap(executor_map);
+ auto builtin_kernel_gen = builtin_context->kernel_gen;
+ builtin_kernel_gen->setTensorRegistries(tensor_regs);
+ builtin_kernel_gen->setExecutorMap(executor_map);
}
}
std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts;
for (auto &pair : backend_contexts)
{
- // NOTE controlflow backend must be processed lastly.
+ // NOTE builtin backend must be processed lastly.
// This is because of Permute layer's specialty which is the only operation that could have
// different ITensor objects for the input and the output. And it requires all other backends'
// tensors are ready to use.
- if (pair.first->config()->id() == "controlflow")
+ if (pair.first->config()->id() == "builtin")
ordered_contexts.emplace_back(pair.first, pair.second.get());
else
ordered_contexts.emplace_front(pair.first, pair.second.get());
}
+ // Simulate the execution for deallocation of tensors
+ std::unordered_map<ir::OperationIndex, DeallocList> dealloc_list_map;
+ {
+ ir::OperandIndexMap<uint32_t> uses_map;
+ ir::OperandIndexSequence constants;
+
+ auto model_io =
+ (graph.getInputs() + graph.getOutputs()) | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+
+ // Prepare scanning
+ graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+ uses_map[ind] = obj.getUses().size();
+
+ if (obj.isConstant())
+ constants.append(ind);
+ });
+
+ // A trick to consider constants as an execption
+ for (const auto &ind : constants)
+ {
+ uses_map[ind]++;
+ }
+
+ for (const auto op_ind : order)
+ {
+ const auto &op = graph.operations().at(op_ind);
+ auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+ auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+
+ for (const auto &ind : op_inputs)
+ {
+ const auto &operand = graph.operands().at(ind);
+ assert(uses_map.find(ind) != uses_map.end());
+ assert(uses_map[ind] > 0);
+ uses_map[ind]--;
+ if (uses_map[ind] == 0 && !operand.info().isVariable() && !model_io.contains(ind))
+ {
+ dealloc_list_map[op_ind].emplace_back(tensor_regs.getITensor(ind));
+ }
+ }
+ }
+
+ // Dispose and validate
+ for (const auto &ind : constants)
+ {
+ --uses_map[ind];
+ }
+
+ assert(
+ std::all_of(uses_map.begin(), uses_map.end(),
+ [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+ }
+
// Generate kernels
for (auto &pair : ordered_contexts)
{
- auto codes = pair.second->genKernels(order, lowered_graph->op_seqs());
+ auto codes = pair.second->genKernels();
for (auto &pair : codes)
{
- auto &op_seq_ind = pair.first;
+ auto &op_ind = pair.first;
auto &fn_seq = pair.second;
- auto &op_seq = lowered_graph->op_seqs().at(op_seq_ind);
- auto lower_info = lowered_graph->getLowerInfo(op_seq_ind);
+ auto &op = lowered_graph->graph().operations().at(op_ind);
+ auto lower_info = lowered_graph->lower_info().operation.getRawPtr(op_ind);
if (options.he_profiling_mode)
fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
- builder.append(op_seq_ind, {&op_seq, lower_info, std::move(fn_seq)});
+ if (!dealloc_list_map[op_ind].empty())
+ fn_seq->append(std::make_unique<DeallocFunction>(dealloc_list_map[op_ind]));
+ builder.append(op_ind, {op_ind, &op, lower_info, std::move(fn_seq)});
}
}
auto code_map = builder.releaseCodeMap();
- auto exec = new exec::LinearExecutor{std::move(lowered_graph), tensor_regs, std::move(code_map),
- order, options.tracing_ctx};
+ auto exec = new exec::LinearExecutor{
+ std::move(lowered_graph), std::move(backend_contexts), tensor_regs, std::move(code_map), order,
+ options.tracing_ctx};
if (!options.trace_filepath.empty())
{
std::unique_ptr<exec::IExecutionObserver> ctp = std::make_unique<exec::TracingObserver>(
- options.trace_filepath, exec->graph(), options.tracing_ctx);
+ options.trace_filepath, exec->graph(), options.tracing_ctx);
exec->addObserver(std::move(ctp));
}
}
exec::IExecutor *ExecutorFactory::createDataflowExecutor(
- std::unique_ptr<compiler::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel)
+ std::unique_ptr<compiler::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options,
+ const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel)
{
- const auto &backend_contexts = lowered_graph->backend_contexts();
+ backend::BackendContexts backend_contexts =
+ createBackendContexts(*lowered_graph, options.executor == "Linear");
- initializeBackendContext(lowered_graph.get());
-
- TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
-
- assert(!lowered_graph->graph().isBuildingPhase());
+ TensorRegistries tensor_regs{backend_contexts, true};
initializeSubgraphIOTensors(
- *lowered_graph, (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
- ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
+ *lowered_graph, backend_contexts,
+ (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
+ ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
- // linearize
- // This order is just for giving topological order info to the backens
- // TODO When we pass a partial graph to a backend, we can remove this
- auto order = Linear::linearize(*lowered_graph);
for (auto &pair : backend_contexts)
{
- pair.second->genTensors(order, lowered_graph->op_seqs(), *lowered_graph->getLowerInfo());
+ pair.second->genTensors();
}
- prepareMigrantTensors(*lowered_graph);
+ prepareMigrantTensors(*lowered_graph, backend_contexts);
- // Give some runtime objects to controlflow KernelGenerator
+ // Give some runtime objects to builtin KernelGenerator
for (auto &pair : backend_contexts)
{
- auto cf_context = dynamic_cast<backend::controlflow::BackendContext *>(pair.second.get());
- if (cf_context != nullptr)
+ auto builtin_context = dynamic_cast<backend::builtin::BackendContext *>(pair.second.get());
+ if (builtin_context != nullptr)
{
- auto cf_kernel_gen = cf_context->kernel_gen;
- cf_kernel_gen->setTensorRegistries(tensor_regs);
- cf_kernel_gen->setExecutorMap(executor_map);
+ auto builtin_kernel_gen = builtin_context->kernel_gen;
+ builtin_kernel_gen->setTensorRegistries(tensor_regs);
+ builtin_kernel_gen->setExecutorMap(executor_map);
}
}
std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts;
for (auto &pair : backend_contexts)
{
- // NOTE controlflow backend must be processed lastly.
+ // NOTE builtin backend must be processed lastly.
// This is because of Permute layer's specialty which is the only operation that could have
// different ITensor objects for the input and the output. And it requires all other backends'
// tensors are ready to use.
- if (pair.first->config()->id() == "controlflow")
+ if (pair.first->config()->id() == "builtin")
ordered_contexts.emplace_back(pair.first, pair.second.get());
else
ordered_contexts.emplace_front(pair.first, pair.second.get());
// Generate kernels
for (auto &pair : ordered_contexts)
{
- auto codes = pair.second->genKernels(order, lowered_graph->op_seqs());
+ auto codes = pair.second->genKernels();
for (auto &pair : codes)
{
- auto &op_seq_ind = pair.first;
+ auto &op_ind = pair.first;
auto &fn_seq = pair.second;
- auto &op_seq = lowered_graph->op_seqs().at(op_seq_ind);
- auto lower_info = lowered_graph->getLowerInfo(op_seq_ind);
+ auto &op = lowered_graph->graph().operations().at(op_ind);
+ auto lower_info = lowered_graph->lower_info().operation.getRawPtr(op_ind);
if (options.he_profiling_mode)
fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
- builder.append(op_seq_ind, {&op_seq, lower_info, std::move(fn_seq)});
+ builder.append(op_ind, {op_ind, &op, lower_info, std::move(fn_seq)});
}
}
exec::ExecutorBase *exec = nullptr;
if (parallel)
{
- exec = new exec::ParallelExecutor{std::move(lowered_graph), tensor_regs, std::move(code_map),
- options.tracing_ctx};
+ exec = new exec::ParallelExecutor{std::move(lowered_graph), std::move(backend_contexts),
+ tensor_regs, std::move(code_map), options.tracing_ctx};
}
else
{
- auto dataflow_exec = new exec::DataflowExecutor{std::move(lowered_graph), tensor_regs,
- std::move(code_map), options.tracing_ctx};
+ auto dataflow_exec =
+ new exec::DataflowExecutor{std::move(lowered_graph), std::move(backend_contexts), tensor_regs,
+ std::move(code_map), options.tracing_ctx};
if (options.he_profiling_mode)
{
std::vector<const backend::Backend *> backends;
}
auto et = std::make_shared<exec::ExecTime>(backends);
std::unique_ptr<exec::IExecutionObserver> obs =
- std::make_unique<exec::ProfileObserver>(et, dataflow_exec->graph());
+ std::make_unique<exec::ProfileObserver>(et, dataflow_exec->graph());
dataflow_exec->addObserver(std::move(obs));
}
exec = dataflow_exec;
if (!options.trace_filepath.empty())
{
std::unique_ptr<exec::IExecutionObserver> ctp = std::make_unique<exec::TracingObserver>(
- options.trace_filepath, exec->graph(), options.tracing_ctx);
+ options.trace_filepath, exec->graph(), options.tracing_ctx);
exec->addObserver(std::move(ctp));
}
ExecutorFactory();
private:
- static void initializeBackendContext(compiler::LoweredGraph *lowered_graph);
- static void runTensorRegistration(compiler::LoweredGraph *lowered_graph,
- const std::vector<ir::OpSequenceIndex> &order);
- static void prepareMigrantTensors(compiler::LoweredGraph &lowered_graph);
+ static void prepareMigrantTensors(compiler::LoweredGraph &lowered_graph,
+ const backend::BackendContexts &backend_contexts);
static exec::IExecutor *
createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const compiler::CompilerOptions &options,
private:
std::unordered_map<std::string, std::function<exec::IExecutor *(
- std::unique_ptr<compiler::LoweredGraph>,
- const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::ExecutorMap> &executor_map)>>
- _map;
+ std::unique_ptr<compiler::LoweredGraph>,
+ const compiler::CompilerOptions &options,
+ const std::shared_ptr<exec::ExecutorMap> &executor_map)>>
+ _map;
};
} // namespace compiler
* limitations under the License.
*/
+#if 0 // This file is temporarily unused
+
#include "Fp32ToFp16Converter.h"
#include "ir/operation/ConvertFp32ToFp16.h"
#include "ir/operation/ConvertFp16ToFp32.h"
{
Fp32ToFp16Converter::Fp32ToFp16Converter(compiler::LoweredGraph &lowered_graph)
- : _lowered_graph{lowered_graph}
+ : _lowered_graph{lowered_graph}
{
VERBOSE(Fp32ToFp16Converter) << "Fp16 Enable on" << std::endl;
}
void Fp32ToFp16Converter::appendOpSequences()
{
_lowered_graph.op_seqs().iterate(
- [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) {
- const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
- assert(lower_info != nullptr);
-
- // For now, the only acl_cl supports fully fp16 type
- // TODO Support fp16 on acl_neon. Current acl_neon supports the only reshape and concat
- // operations.
- // To do this, we could check the support by `operation by operation`. After that, we
- // would partition an op_seq if it contains unsupported operations.
- if (lower_info->backend()->config()->id() != kAclClBackendConfigId)
- return;
-
- // OpSeq's input set should be included in the first operation's input set or
- // OpSeq's output set should be included in the last operation's output set
- assert(checkOperandsOfOpSequence(op_seq));
-
- // Append converting OpSequence for fp16 but all operands' types are not fp16 still.
- appendNewOpSeqForConvertFp32ToFp16(op_seq_ind, op_seq);
- appendNewOpSeqForConvertFp16ToFp32(op_seq_ind, op_seq);
- });
+ [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) {
+ const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
+ assert(lower_info != nullptr);
+
+ // For now, the only acl_cl supports fully fp16 type
+ // TODO Support fp16 on acl_neon. Current acl_neon supports the only reshape and concat
+ // operations.
+ // To do this, we could check the support by `operation by operation`. After that, we
+ // would partition an op_seq if it contains unsupported operations.
+ if (lower_info->backend()->config()->id() != kAclClBackendConfigId)
+ return;
+
+ // OpSeq's input set should be included in the first operation's input set or
+ // OpSeq's output set should be included in the last operation's output set
+ assert(checkOperandsOfOpSequence(op_seq));
+
+ // Append converting OpSequence for fp16 but all operands' types are not fp16 still.
+ appendNewOpSeqForConvertFp32ToFp16(op_seq_ind, op_seq);
+ appendNewOpSeqForConvertFp16ToFp32(op_seq_ind, op_seq);
+ });
}
//
const auto new_op_seq_ind = newOpSequence(op_seq_ind, new_node_ind);
// set new lower_info for op_seq
- setNewOpSequenceLowerInfo(op_seq_ind, new_op_seq_ind);
+ setNewOperationLowerInfo(op_seq_ind, new_op_seq_ind);
_list_fp32_to_fp16.insert(new_op_seq_ind);
auto new_op_seq_ind = newOpSequence(op_seq_ind, new_node_ind);
// set new lower_info for op_seq
- setNewOpSequenceLowerInfo(op_seq_ind, new_op_seq_ind);
+ setNewOperationLowerInfo(op_seq_ind, new_op_seq_ind);
_list_fp16_to_fp32.insert(new_op_seq_ind);
void Fp32ToFp16Converter::convertOperands()
{
_lowered_graph.op_seqs().iterate(
- [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) {
- const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
- assert(lower_info != nullptr);
- // For now, the only acl_cl supports fully fp16
- if (lower_info->backend()->config()->id() != kAclClBackendConfigId)
- return;
-
- // Convert input,output operands' type to fp16
- convertOperandsOfOpSequence(op_seq);
- });
+ [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) {
+ const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
+ assert(lower_info != nullptr);
+ // For now, the only acl_cl supports fully fp16
+ if (lower_info->backend()->config()->id() != kAclClBackendConfigId)
+ return;
+
+ // Convert input,output operands' type to fp16
+ convertOperandsOfOpSequence(op_seq);
+ });
}
void Fp32ToFp16Converter::convertOperandsOfOpSequence(ir::OpSequence &op_seq)
obj.type(ir::DataType::FLOAT16);
- VERBOSE(Fp32ToFp16Converter) << "Input Operand #" << ind.value() << ": fp16" << std::endl;
+ VERBOSE(Fp32ToFp16Converter) << "Input Operand " << ind << ": fp16" << std::endl;
}
for (auto &ind : node.getOutputs())
obj.type(ir::DataType::FLOAT16);
- VERBOSE(Fp32ToFp16Converter) << "Output Operand #" << ind.value() << ": fp16" << std::endl;
+ VERBOSE(Fp32ToFp16Converter) << "Output Operand " << ind << ": fp16" << std::endl;
}
}
}
obj.data(std::move(new_data));
obj.type(ir::DataType::FLOAT16);
- VERBOSE(Fp32ToFp16Converter) << "Constant Operand #" << ind.value() << ": fp16" << std::endl;
+ VERBOSE(Fp32ToFp16Converter) << "Constant Operand " << ind << ": fp16" << std::endl;
}
});
}
{
const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
assert(lower_info != nullptr);
- auto new_lower_info = std::make_unique<ir::operand::LowerInfo>();
- auto permute_factor = ir::operand::PermuteFactor(lower_info->backend(), lower_info->layout());
+ auto new_lower_info = std::make_unique<compiler::OperandLowerInfo>();
+ auto permute_factor = compiler::PermuteFactor(lower_info->backend(), lower_info->layout());
new_lower_info->addDefPermuteFactor(permute_factor);
new_lower_info->addUsePermuteFactor(permute_factor);
_lowered_graph.setLowerInfo(new_op_ind, std::move(new_lower_info));
}
-void Fp32ToFp16Converter::setNewOpSequenceLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
- const ir::OpSequenceIndex &new_op_seq_ind)
+void Fp32ToFp16Converter::setNewOperationLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
+ const ir::OpSequenceIndex &new_op_seq_ind)
{
const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
assert(lower_info != nullptr);
auto new_lower_info =
- std::make_unique<ir::operation::LowerInfo>(lower_info->backend(), lower_info->layout());
+ std::make_unique<compiler::OperationLowerInfo>(lower_info->backend(), lower_info->layout());
_lowered_graph.setLowerInfo(new_op_seq_ind, std::move(new_lower_info));
}
auto &new_op_obj = operands.at(new_op_ind);
std::unique_ptr<ir::Operation> new_node(
- new ir::operation::ConvertFp32ToFp16({op_seq_input_ind}, {new_op_ind}));
+ new ir::operation::ConvertFp32ToFp16({op_seq_input_ind}, {new_op_ind}));
const auto new_node_ind = operations.push(std::move(new_node));
input_obj.insertUse(new_node_ind);
auto &new_op_obj = operands.at(new_op_ind);
std::unique_ptr<ir::Operation> new_node(
- new ir::operation::ConvertFp16ToFp32({new_op_ind}, {op_seq_output_ind}));
+ new ir::operation::ConvertFp16ToFp32({new_op_ind}, {op_seq_output_ind}));
const auto new_node_ind = operations.push(std::move(new_node));
new_op_obj.insertUse(new_node_ind);
opseq_map_to_delete[op_seq_ind_fp16_to_fp32].insert(op_seq_ind);
}
- VERBOSE(Fp32ToFp16Converter)
- << "Contiguous from OpSeq#" << op_seq_ind_fp16_to_fp32.value() << "(ToFp32)"
- << " to OpSeq#" << op_seq_ind.value() << "(ToFp16)" << std::endl;
+ VERBOSE(Fp32ToFp16Converter) << "Contiguous from " << op_seq_ind_fp16_to_fp32 << "(ToFp32)"
+ << " to " << op_seq_ind << "(ToFp16)" << std::endl;
}
}
}
}
void Fp32ToFp16Converter::manipulateContiguousOpSequences(
- const InputToOpSeqs &input_to_op_seqs, const OpSeqIndexToOpSeqIndexList &opseq_map_to_delete)
+ const InputToOpSeqs &input_to_op_seqs, const OpSeqIndexToOpSeqIndexList &opseq_map_to_delete)
{
auto &op_seqs = _lowered_graph.op_seqs();
}
void Fp32ToFp16Converter::deleteContiguousOpSequences(
- const OpSeqIndexList &list_to_delete_op_seqs,
- const ir::OperandIndexSequence &list_to_delete_ops)
+ const OpSeqIndexList &list_to_delete_op_seqs, const ir::OperandIndexSequence &list_to_delete_ops)
{
auto &operands = _lowered_graph.graph().operands();
auto &operations = _lowered_graph.graph().operations();
{
auto &op_seq = op_seqs.at(op_seq_ind);
assert(op_seq.size() == 1);
- VERBOSE(Fp32ToFp16Converter) << "Delete OpSeq #" << op_seq_ind.value() << std::endl;
+ VERBOSE(Fp32ToFp16Converter) << "Delete OpSeq " << op_seq_ind << std::endl;
auto &first_node_ind = op_seq.operations().at(0);
auto &first_node = operations.at(first_node_ind);
assert(first_node.opcode() == ir::OpCode::ConvertFp32ToFp16 ||
first_node.opcode() == ir::OpCode::ConvertFp16ToFp32);
- VERBOSE(Fp32ToFp16Converter) << "Delete Node #" << first_node_ind.value() << std::endl;
+ VERBOSE(Fp32ToFp16Converter) << "Delete Node " << first_node_ind << std::endl;
// Uses
for (auto &ind : first_node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
{
auto &obj = operands.at(ind);
obj.removeUse(first_node_ind);
- VERBOSE(Fp32ToFp16Converter) << "Operand #" << ind.value() << "'s Use(Node#"
- << first_node_ind.value() << ") is removed" << std::endl;
+ VERBOSE(Fp32ToFp16Converter)
+ << "Operand " << ind << "'s Use(Node" << first_node_ind << ") is removed" << std::endl;
}
// Def
auto &obj = operands.at(ind);
assert(obj.getDef() == first_node_ind);
obj.unsetDef();
- VERBOSE(Fp32ToFp16Converter) << "Operand #" << ind.value() << "'s Def(Node#"
- << first_node_ind.value() << ") is removed" << std::endl;
+ VERBOSE(Fp32ToFp16Converter)
+ << "Operand " << ind << "'s Def(Node" << first_node_ind << ") is removed" << std::endl;
}
// Operation
operations.remove(first_node_ind);
- VERBOSE(Fp32ToFp16Converter) << "Node#" << first_node_ind.value() << " is removed" << std::endl;
+ VERBOSE(Fp32ToFp16Converter) << "Node" << first_node_ind << " is removed" << std::endl;
// OpSequence
op_seqs.remove(op_seq_ind);
- VERBOSE(Fp32ToFp16Converter) << "OpSeq#" << op_seq_ind.value() << " is removed" << std::endl;
+ VERBOSE(Fp32ToFp16Converter) << "OpSeq" << op_seq_ind << " is removed" << std::endl;
}
// Operand
for (auto &ind : list_to_delete_ops)
{
operands.remove(ind);
- VERBOSE(Fp32ToFp16Converter) << "Operand #" << ind.value() << " is removed" << std::endl;
+ VERBOSE(Fp32ToFp16Converter) << "Operand " << ind << " is removed" << std::endl;
}
}
} // namespace compiler
} // namespace onert
+
+#endif
* limitations under the License.
*/
+#if 0 // This file is temporarily unused
+
#ifndef __ONERT_COMPILER_FP32_TO_FP16_CONVERTER_H__
#define __ONERT_COMPILER_FP32_TO_FP16_CONVERTER_H__
void setNewOperandLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
const ir::OperandIndex &new_op_ind);
- void setNewOpSequenceLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
- const ir::OpSequenceIndex &new_op_seq_ind);
+ void setNewOperationLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
+ const ir::OpSequenceIndex &new_op_seq_ind);
void manipulateInput(const ir::OpSequenceIndex &op_seq_ind,
const ir::OperandIndex &op_seq_input_ind,
} // namespace onert
#endif // __ONERT_COMPILER_FP32_TO_FP16_CONVERTER_H__
+
+#endif
void HEScheduler::scheduleShufflingBackends()
{
VERBOSE(HEScheduler::schedule)
- << "Started task scheduling: uses all backends to get more metrics for data transfer"
- << std::endl;
+ << "Started task scheduling: uses all backends to get more metrics for data transfer"
+ << std::endl;
size_t backend_ind = 0;
for (const auto &rank : _rank_to_op)
{
- VERBOSE(HEScheduler::schedule) << "scheduling (" << rank.second.value() << ")" << std::endl;
+ VERBOSE(HEScheduler::schedule) << "scheduling (" << rank.second << ")" << std::endl;
const auto &node = _graph->operations().at(rank.second);
const bool quant = isQuant(*_graph, node);
const auto size = getOperationsFlattenedIOSize(*_graph, node);
continue;
}
const auto exec_time =
- _exec_time->getOperationExecTime(_all_backends[backend_ind], node.name(), quant, size);
+ _exec_time->getOperationExecTime(_all_backends[backend_ind], node.name(), quant, size);
// Scheduling to measure data transfer must be done after measuring all backends separately
assert(exec_time != _exec_time->NOT_FOUND);
if (exec_time == _exec_time->getMax())
ir::OperationIndexMap<bool> visited;
graph.operations().iterate(
- [&](const ir::OperationIndex &index, const ir::Operation &) { visited[index] = false; });
+ [&](const ir::OperationIndex &index, const ir::Operation &) { visited[index] = false; });
// for each task select the backend with the smallest earliest finishing time(eft)
for (const auto &rank : _rank_to_op)
{
if (!_is_profiling_mode)
{
VERBOSE(HEScheduler::tryBackend)
- << "Trying to HE schedule while there is no profiling info for " << node.name()
- << " on backend " << backend->config()->id() << ". So this backend won't be used. "
- << std::endl;
+ << "Trying to HE schedule while there is no profiling info for " << node.name()
+ << " on backend " << backend->config()->id() << ". So this backend won't be used. "
+ << std::endl;
_is_supported[backend][node.name()] = false;
return _exec_time->getMax();
}
VERBOSE(HEScheduler::makeRank) << "task prioritizing" << std::endl;
_graph->operations().iterate(
- [&](const ir::OperationIndex &index, const ir::Operation &) { DFSMaxRank(index); });
+ [&](const ir::OperationIndex &index, const ir::Operation &) { DFSMaxRank(index); });
// Check that ranks are calculated for all operations(nodes)
_graph->operations().iterate([&](const ir::OperationIndex &index, const ir::Operation &) {
assert(rank >= 0);
_rank_to_op.emplace(rank, index);
_op_to_rank->emplace(index, rank);
- VERBOSE(HEScheduler::DFSMaxRank) << "rank of operation (" << index.value() << ")" << node.name()
- << " is " << rank << std::endl;
+ VERBOSE(HEScheduler::DFSMaxRank)
+ << "rank of operation (" << index << ")" << node.name() << " is " << rank << std::endl;
return rank;
}
{
continue;
}
- // TODO Change it to controlflow backend
+ // TODO Change it to builtin backend
auto transfer_cost =
- getPermuteTime(backend, other_backend, quant, operand.info().total_size());
+ getPermuteTime(backend, other_backend, quant, operand.info().total_size());
avg_transfer_cost += transfer_cost;
}
}
bool HEScheduler::schedule(const ir::OperationIndex &index, const backend::Backend *parent_backend)
{
- VERBOSE(HEScheduler::schedule) << "scheduling (" << index.value() << ")" << std::endl;
+ VERBOSE(HEScheduler::schedule) << "scheduling (" << index << ")" << std::endl;
int64_t eft = std::numeric_limits<int64_t>::max(), selected_exec_time = 0;
const auto &node = _graph->operations().at(index);
if (!_is_parallel_exec)
{
VERBOSE(HEScheduler::ESTAndExecTime)
- << "exec_time of (" << index.value() << ") " << node.name() << " quant==" << quant << " on "
- << backend->config()->id() << " is " << exec_time
- << " microseconds. Data transfer cost: " << total_transfer_cost << std::endl;
+ << "exec_time of (" << index << ") " << node.name() << " quant==" << quant << " on "
+ << backend->config()->id() << " is " << exec_time
+ << " microseconds. Data transfer cost: " << total_transfer_cost << std::endl;
return {total_transfer_cost, exec_time};
}
VERBOSE(HEScheduler::ESTAndExecTime)
- << "exec_time of (" << index.value() << ") " << node.name() << " quant==" << quant << " on "
- << backend->config()->id() << ": " << exec_time
- << " microseconds. Backend available time: " << prev_op_ft
- << " Parent's max eft: " << max_pred_eft - total_transfer_cost
- << " data transfer cost: " << total_transfer_cost << std::endl;
+ << "exec_time of (" << index << ") " << node.name() << " quant==" << quant << " on "
+ << backend->config()->id() << ": " << exec_time
+ << " microseconds. Backend available time: " << prev_op_ft
+ << " Parent's max eft: " << max_pred_eft - total_transfer_cost
+ << " data transfer cost: " << total_transfer_cost << std::endl;
return {prev_op_ft, exec_time};
}
{
// Multiply operand size by 2 because size must describe input+output size
int64_t transfer_cost =
- getPermuteTime(parent_backend, backend, quant, input_operand.info().total_size() * 2);
+ getPermuteTime(parent_backend, backend, quant, input_operand.info().total_size() * 2);
transfer_st_exec_time.emplace(_ops_eft.at(input_node_idx), transfer_cost);
}
}
* @param[in] model Graph model
* @param[in] backend_resolver backend resolver
*/
- HEScheduler(const backend::BackendContexts &backend_contexts, const CompilerOptions &options)
- : _is_supported{}, _backends_avail_time{}, _ops_eft{},
- _op_to_rank{std::make_shared<ir::OperationIndexMap<int64_t>>()},
- _is_profiling_mode{options.he_profiling_mode},
- _is_linear_exec{options.executor == "Linear"},
- _is_parallel_exec{options.executor == "Parallel"}
+ HEScheduler(const std::vector<const backend::Backend *> &backends, const CompilerOptions &options)
+ : _is_supported{}, _backends_avail_time{}, _ops_eft{},
+ _op_to_rank{std::make_shared<ir::OperationIndexMap<int64_t>>()},
+ _is_profiling_mode{options.he_profiling_mode}, _is_linear_exec{options.executor == "Linear"},
+ _is_parallel_exec{options.executor == "Parallel"}
{
- for (auto &entry : backend_contexts)
+ for (auto entry : backends)
{
- if (entry.first->config()->id() == backend::controlflow::Config::ID)
+ if (entry->config()->id() == backend::builtin::Config::ID)
continue;
- _all_backends.push_back(entry.first);
+ _all_backends.push_back(entry);
}
_backend_resolver = std::make_unique<compiler::BackendResolver>();
_exec_time = std::make_unique<exec::ExecTime>(_all_backends);
// Find cpu backend
- auto cpu_backend_it = std::find_if(
- _all_backends.begin(), _all_backends.end(),
- [](const backend::Backend *backend) { return backend->config()->id() == "cpu"; });
+ auto cpu_backend_it =
+ std::find_if(_all_backends.begin(), _all_backends.end(), [](const backend::Backend *backend) {
+ return backend->config()->id() == "cpu";
+ });
if (cpu_backend_it == _all_backends.end())
throw std::runtime_error("HEScheduler could be used only if 'cpu' backend is available");
_cpu_backend = *cpu_backend_it;
std::unique_ptr<exec::ExecTime> _exec_time;
const ir::Graph *_graph{nullptr};
std::vector<const backend::Backend *> _all_backends;
- const backend::Backend *_cpu_backend{nullptr}; // TODO Change this to controlflow_backend
+ const backend::Backend *_cpu_backend{nullptr}; // TODO Change this to _builtin_backend
bool _is_profiling_mode;
bool _is_linear_exec;
bool _is_parallel_exec;
*/
#include <algorithm>
+#include <sstream>
#include "Linear.h"
#include "backend/IConfig.h"
#include "backend/Backend.h"
#include "util/logging.h"
+#include "dumper/text/GraphDumper.h"
namespace onert
{
namespace compiler
{
-std::vector<ir::OpSequenceIndex> Linear::linearize(const compiler::LoweredGraph &lowered_graph)
+// TODO(easy) Change the LoweredGraph param to Graph
+std::vector<ir::OperationIndex> Linear::linearize(const compiler::LoweredGraph &lowered_graph)
{
- std::vector<ir::OpSequenceIndex> order;
- lowered_graph.iterateTopolOpSeqs(
- [&](const ir::OpSequenceIndex &index, const ir::OpSequence &) -> void {
- order.emplace_back(index);
- });
- return order;
+ return lowered_graph.graph().topolSortOperations();
}
+// TODO(easy) Change the LoweredGraph param to Graph
void Linear::dump(const compiler::LoweredGraph &lowered_graph,
- const std::vector<ir::OpSequenceIndex> &order)
+ const std::vector<ir::OperationIndex> &order)
{
+ for (const auto ind : order)
{
- const auto &toString = [](const onert::backend::Backend *backend) {
- assert(backend);
- std::string str;
- str += backend->config()->id();
- return "{" + str + "}";
- };
-
- VERBOSE(Linear) << "Final OpSequence" << std::endl;
- for (const auto index : order)
- {
- const auto &op_seq = lowered_graph.op_seqs().at(index);
- const auto lower_info = lowered_graph.getLowerInfo(index);
- const auto &operations = lowered_graph.graph().operations();
- VERBOSE(Linear) << "* OP_SEQ " << toString(lower_info->backend()) << " "
- << ir::getStrFromOpSeq(op_seq, operations) << std::endl;
- }
+ // TODO Could logging system can handle this? (Inserting prefix for each line)
+ std::istringstream iss{dumper::text::formatOperation(lowered_graph.graph(), ind)};
+ std::string line;
+ while (std::getline(iss, line))
+ VERBOSE(GraphDumper) << line << std::endl;
}
}
#include <vector>
#include <memory>
-#include "ir/OpSequences.h"
#include "ir/Index.h"
#include "compiler/LoweredGraph.h"
-namespace onert
-{
-namespace ir
-{
-struct OperationVisitor;
-} // namespace ir
-} // namespace onert
-
namespace onert
{
namespace compiler
class Linear
{
public:
- static std::vector<ir::OpSequenceIndex> linearize(const compiler::LoweredGraph &lowered_graph);
+ static std::vector<ir::OperationIndex> linearize(const compiler::LoweredGraph &lowered_graph);
static void dump(const compiler::LoweredGraph &lowered_graph,
- const std::vector<ir::OpSequenceIndex> &order);
+ const std::vector<ir::OperationIndex> &order);
};
} // namespace compiler
#include "compiler/LoweredGraph.h"
#include <assert.h>
+#include <algorithm>
#include <sstream>
#include "util/logging.h"
#include "compiler/pass/ConstantInsertionPass.h"
#include "compiler/pass/PermutationOperationPass.h"
#include "compiler/pass/PermutationInsertionPass.h"
#include "compiler/pass/PermutationEliminationPass.h"
-#include "ir/GraphIterator.h"
+#include "dumper/text/GraphDumper.h"
#include "ir/verifier/Verifier.h"
#include "backend/Backend.h"
#include "backend/IConfig.h"
options.tracing_ctx->setSubgraphIndex(&_graph, subgraph_index.value());
}
- bool linear_executor = (options.executor == "Linear");
-
// Build backend contexts
auto &backend_manager = BackendManager::get();
-
- // Always create Controlflow backend context
- auto cf_backend = backend_manager.getControlflow();
- _backend_contexts.emplace(
- cf_backend, cf_backend->newContext(_graph, _graph.getKernelBuilder(), linear_executor));
-
// Create contexts for other backends
for (auto backend_str : options.backend_list)
{
VERBOSE(LoweredGraph) << "Cannot load backend - " << backend_str << std::endl;
continue;
}
-
- _backend_contexts.emplace(
- backend, backend->newContext(_graph, _graph.getKernelBuilder(), linear_executor));
}
if (backend_manager.num_backends() == 0)
throw std::runtime_error{"No available backends loaded."};
// TODO Move "schedule" phase out of here
// Schedule
std::unique_ptr<BackendResolver> backend_resolver;
+ auto all_backends = backend_manager.getAll();
if (options.he_scheduler)
{
- auto scheduler = HEScheduler(_backend_contexts, options);
+ auto scheduler = HEScheduler(all_backends, options);
backend_resolver = scheduler.schedule(_graph);
_indexed_ranks = scheduler.getIndexedRanks();
}
else
{
- auto scheduler = ManualScheduler(_backend_contexts, options);
+ auto scheduler = ManualScheduler(all_backends, options);
backend_resolver = scheduler.schedule(_graph);
}
- {
- // operand::LowerInfo holder
- ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> operands_lower_info;
-
- _graph.operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &) {
- operands_lower_info[index] = std::make_unique<ir::operand::LowerInfo>();
- });
-
- // Make op_seqs while checking whether a node can be merged into a op_seq.
- makeOpSequences(operands_lower_info, options, *backend_resolver);
-
- _op_seqs.iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
- assert(op_seq.operations().size() > 0);
- std::reverse(std::begin(op_seq.operations()), std::end(op_seq.operations()));
- });
+ makeLowerInfo(*backend_resolver);
+ VERBOSE(LoweredGraph) << "dump before mandatory passes" << std::endl;
+ dumper::text::dumpLoweredGraph(*this);
- VERBOSE(OpSequences) << "dump before permutation insertion" << std::endl;
- dumpOpSequences(_op_seqs, _graph.operations());
-
- // Mandatory passes
- pass::PassRunner{}
- .append(std::make_unique<pass::ConstantInsertionPass>(*this))
- .append(std::make_unique<pass::ConstantLoweringPass>(*this))
- .run();
-
- // Set LowerInfo for each operand from the operand::LowerInfo holder
- manipulateLowerInfo(operands_lower_info);
-
- dumpLowerInfo();
- }
-
- // Mandatory passes
+ // Mandatory passes - kind of legalization(?)
pass::PassRunner{}
- .append(std::make_unique<pass::PermutationOperationPass>(*this))
- .append(std::make_unique<pass::PermutationInsertionPass>(*this))
- .run();
+ .append(std::make_unique<pass::ConstantInsertionPass>(*this))
+ .append(std::make_unique<pass::ConstantLoweringPass>(*this))
+ .append(std::make_unique<pass::PermutationOperationPass>(*this))
+ .append(std::make_unique<pass::PermutationInsertionPass>(*this))
+ .run();
+
+ dumpLowerInfo();
- // Optimization passes
+ // Optimization passes (optional)
pass::PassRunner{}.append(std::make_unique<pass::PermutationEliminationPass>(*this)).run();
- VERBOSE(LoweredGraph) << "Dump after permutation insertion" << std::endl;
+ VERBOSE(LoweredGraph) << "Dump after all the passes" << std::endl;
for (auto operand : _graph.getInputs())
VERBOSE(LoweredGraph) << "Graph Input : " << operand << std::endl;
for (auto operand : _graph.getOutputs())
VERBOSE(LoweredGraph) << "Graph Output : " << operand << std::endl;
- dumpOpSequences(_op_seqs, _graph.operations());
+ dumper::text::dumpLoweredGraph(*this);
// Graph verifications
{
assert(ir::verifier::InputOutputChecker().verify(_graph));
assert(ir::verifier::DAGChecker().verify(_graph));
- assert(ir::verifier::EdgeConsistencyChecker().verify(_graph));
+ assert(ir::verifier::EdgeChecker().verify(_graph));
}
}
-const ir::operation::LowerInfo *
-LoweredGraph::getLowerInfo(const ir::OpSequenceIndex &op_seq_index) const
-{
- auto itr = _lower_info_map.op_seq.find(op_seq_index);
- if (itr == _lower_info_map.op_seq.end())
- return nullptr;
- return itr->second.get();
-}
-
-void LoweredGraph::setLowerInfo(const ir::OpSequenceIndex &op_seq_index,
- std::unique_ptr<ir::operation::LowerInfo> &&lower_info)
+void LoweredGraph::makeLowerInfo(const compiler::BackendResolver &backend_resolver)
{
- _lower_info_map.op_seq.insert(std::make_pair(op_seq_index, std::move(lower_info)));
-}
+ _graph.operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &) {
+ lower_info().operand.set(index, std::make_unique<OperandLowerInfo>());
+ });
-void LoweredGraph::removeLowerInfo(const ir::OpSequenceIndex &op_seq_index)
-{
- auto &op_seq_lower_info = _lower_info_map.op_seq;
- assert(op_seq_lower_info.find(op_seq_index) != op_seq_lower_info.end());
- for (auto it = op_seq_lower_info.begin(); it != op_seq_lower_info.end(); ++it)
- {
- if (it->first == op_seq_index)
+ // Set operand lower info using assigned backends to operations
+ _graph.operations().iterate([&](const ir::OperationIndex &op_ind, const ir::Operation &) {
+ const ir::Operation &op = _graph.operations().at(op_ind);
+ auto backend = backend_resolver.getBackend(op_ind);
+ if (!backend)
{
- op_seq_lower_info.erase(it);
- break;
+ throw std::runtime_error{"Fail to find backend for " + op.name() + " operation"};
}
- }
-}
-
-const ir::operand::LowerInfo *LoweredGraph::getLowerInfo(const ir::OperandIndex &index) const
-{
- auto itr = _lower_info_map.operand.find(index);
- if (itr == _lower_info_map.operand.end())
- return nullptr;
- return itr->second.get();
-}
-
-ir::operand::LowerInfo *LoweredGraph::getLowerInfo(const ir::OperandIndex &index)
-{
- auto itr = _lower_info_map.operand.find(index);
- if (itr == _lower_info_map.operand.end())
- return nullptr;
- return itr->second.get();
-}
-
-void LoweredGraph::setLowerInfo(const ir::OperandIndex &index,
- std::unique_ptr<ir::operand::LowerInfo> &&lower_info)
-{
- _lower_info_map.operand.insert(std::make_pair(index, std::move(lower_info)));
-}
-
-void LoweredGraph::removeLowerInfo(const ir::OperandIndex &index)
-{
- _lower_info_map.operand.erase(index);
-}
-
-void LoweredGraph::iterateTopolOpSeqs(
- const std::function<void(const ir::OpSequenceIndex &, const ir::OpSequence &)> &fn) const
-{
- // Topological Sorting for ir::OpSequences
- std::vector<ir::OpSequenceIndex> topol_sorted;
- ir::PostDfsIterator<true>{}.iterateOpSeqs(
- *this, [&](const ir::OpSequenceIndex &index, const ir::OpSequence &) {
- topol_sorted.emplace_back(index);
- });
- std::reverse(topol_sorted.begin(), topol_sorted.end());
- for (const auto op_seq_idx : topol_sorted)
- {
- const auto &op_seq = _op_seqs.at(op_seq_idx);
- fn(op_seq_idx, op_seq);
- }
-}
-
-void LoweredGraph::iterateTopolOpSeqs(
- const std::function<void(const ir::OpSequenceIndex &, ir::OpSequence &)> &fn)
-{
- // Topological Sorting for ir::OpSequences
- std::vector<ir::OpSequenceIndex> topol_sorted;
- ir::PostDfsIterator<false>{}.iterateOpSeqs(
- *this, [&](const ir::OpSequenceIndex &index, ir::OpSequence &) {
- topol_sorted.emplace_back(index);
- });
- std::reverse(topol_sorted.begin(), topol_sorted.end());
- for (const auto op_seq_idx : topol_sorted)
- {
- auto &op_seq = _op_seqs.at(op_seq_idx);
- fn(op_seq_idx, op_seq);
- }
-}
-
-ir::OpSequenceIndex LoweredGraph::appendFreshSingleOpSequence(const ir::OperationIndex &node_index,
- const ir::Operation &node)
-{
- // Create a fresh op_seq with one operation, and append it to op_seqs
- // Create a fresh op_seq
- auto op_seq = std::make_unique<ir::OpSequence>(_graph.layout());
-
- // Add an operation
- op_seq->appendOperation(node_index);
-
- // Update input/output
- op_seq->setOutputs(node.getOutputs());
- op_seq->setInputs(node.getInputs());
-
- return _op_seqs.emplace(std::move(op_seq));
-}
-
-void LoweredGraph::makeOpSequences(
- ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
- const CompilerOptions &options, const BackendResolver &backend_resolver)
-{
- // if SUBG_MAX_NODE == 0, no limit on nodes of a op_seq
- const int op_seq_max_node = options.op_seq_max_node;
- assert(op_seq_max_node >= 0);
-
- bool is_profiling = options.he_profiling_mode;
- ir::OpSequence *op_seq = nullptr;
- ir::OpSequenceIndex op_seq_index;
-
- // NOTE: The below method appends nodes while making one op_seq if needed. If something better
- // ways, happy to update this code.
- ir::PostDfsConstIterator{}.iterate(
- _graph, [&](const ir::OperationIndex &node_index, const ir::Operation &node) {
- // LowerInfo for in/output operands
- auto backend = backend_resolver.getBackend(node_index);
-
- // Get frontend's layout
- auto frontend_layout = _graph.layout();
-
- // The layout of each backend should be set at another place
- // TODO Change setting layout of each backend at another place
- auto backend_layout = backend->config()->supportLayout(node, frontend_layout);
-
- for (auto operand : node.getInputs() | ir::Remove::UNDEFINED)
- {
- auto &&lower_info = operands_lower_info.at(operand);
- lower_info->addUsePermuteFactor(ir::operand::PermuteFactor{backend, backend_layout});
- }
- for (auto operand : node.getOutputs() | ir::Remove::UNDEFINED)
- {
- auto &&lower_info = operands_lower_info.at(operand);
- lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{backend, backend_layout});
- }
-
- bool new_op_seq = (op_seq == nullptr ||
- (op_seq_max_node != 0 &&
- op_seq->operations().size() >= static_cast<size_t>(op_seq_max_node)));
- // for profiling each op_seq must contain just one node,
- // so that we can measure a node separately
- if (new_op_seq || is_profiling ||
- !mergeable(op_seq_index, node_index, backend_layout, backend_resolver))
- {
- auto new_op_seq_index = appendFreshSingleOpSequence(node_index, node);
-
- // ir::OpSequence LowerInfo
- setLowerInfo(new_op_seq_index,
- std::make_unique<ir::operation::LowerInfo>(backend, backend_layout));
-
- op_seq_index = new_op_seq_index;
- op_seq = &(_op_seqs.at(new_op_seq_index));
-
- VERBOSE(Lower) << "OpSequence#" << op_seq_index.value() << " is created for "
- << "NODE#" << node_index.value() << "(" << node.name() << ")" << std::endl;
- }
- else
- {
- op_seq->appendOperation(node_index);
- // Set inputs
- auto new_inputs = node.getInputs();
- // Add inputs except outputs of the previous node
- for (auto ind : op_seq->getInputs())
- {
- if (!node.getOutputs().contains(ind))
- new_inputs.append(ind);
- }
- op_seq->setInputs(new_inputs);
+ auto frontend_layout = _graph.layout();
- VERBOSE(Lower) << "OpSequence#" << op_seq_index.value() << " merges "
- << "NODE#" << node_index.value() << "(" << node.name() << ")" << std::endl;
- }
- });
-}
+ // The layout of each backend should be set at another place
+ // TODO Change setting layout of each backend at another place
+ auto backend_layout = backend->config()->supportLayout(op, frontend_layout);
-void LoweredGraph::manipulateLowerInfo(
- ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info)
-{
- const auto controlflow_backend = BackendManager::get().getControlflow();
+ for (auto ind : op.getInputs() | ir::Remove::UNDEFINED)
+ {
+ auto &operand_li = lower_info().operand.at(ind);
+ operand_li.addUsePermuteFactor(PermuteFactor{backend, backend_layout});
+ }
+ for (auto ind : op.getOutputs() | ir::Remove::UNDEFINED)
+ {
+ auto &operand_li = lower_info().operand.at(ind);
+ operand_li.addDefPermuteFactor(PermuteFactor{backend, backend_layout});
+ }
+ lower_info().operation.set(
+ op_ind, std::make_unique<compiler::OperationLowerInfo>(backend, backend_layout));
+ });
- // TODO Rather than using NHWC Get frontend layout of this node from IR
- auto factor = ir::operand::PermuteFactor{controlflow_backend, ir::Layout::NHWC};
+ // Handle graph inputs and outputs
+ const auto builtin_backend = BackendManager::get().getBuiltin();
+ auto factor = PermuteFactor{builtin_backend, _graph.layout()};
for (auto index : _graph.getInputs() | ir::Remove::UNDEFINED)
{
- auto &&lower_info = operands_lower_info.at(index);
- assert(lower_info->def_factors().empty());
- lower_info->addDefPermuteFactor(factor);
- }
- for (auto index : _graph.getOutputs() | ir::Remove::UNDEFINED)
- {
- auto &&lower_info = operands_lower_info.at(index);
- lower_info->addUsePermuteFactor(factor);
+ auto &operand_li = lower_info().operand.at(index);
+ assert(operand_li.def_factors().empty());
+ operand_li.addDefPermuteFactor(factor);
}
for (auto index : _graph.getOutputs() | ir::Remove::UNDEFINED)
{
- auto &&lower_info = operands_lower_info.at(index);
- if (lower_info->def_factors().size() == 0)
- {
- // In case of that an operand is Graph's output and not input or output of any operation
- lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{
- controlflow_backend,
- ir::Layout::NHWC // TODO Get frontend layout of this node from IR
- });
- }
+ auto &operand_li = lower_info().operand.at(index);
+ operand_li.addUsePermuteFactor(factor);
}
- // 1. Add def of variable operand
- // 2. Set LowerInfo for each operand from the operand::LowerInfo holder
+ // Handle variable tensors
_graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &operand) {
// Some inputs of an operation could be non-constant, but not existed in graph inputs/outputs
- // and not undefined operand. Those inputs must have exist as a Tensor. For example,
- // UnidirectionalSequenceLSTM operation could have state inputs such as it.
+ // and not undefined operand - these are variable tensors. For example,
+ // UnidirectionalSequenceLSTM has such inputs.
if (operand.info().isVariable())
{
// The variable operand with buffer is not supported yet
assert(operand.data() == nullptr);
assert(operand.getUses().size() == 1 && !operand.getDef().valid());
- auto &lowered_info = operands_lower_info[index];
- assert(lowered_info->def_factors().empty());
- lowered_info->addDefPermuteFactor(lowered_info->use_factors().getOnlyElement());
+ auto operand_li = lower_info().operand.at(index);
+ assert(operand_li.def_factors().empty());
+ operand_li.addDefPermuteFactor(operand_li.use_factors().getOnlyElement());
}
-
- setLowerInfo(index, std::move(operands_lower_info[index]));
});
}
std::map<uint32_t, std::string> dumps;
_graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &object) {
- std::stringstream sstream;
- if (!getLowerInfo(index)->def_factors().empty() || !getLowerInfo(index)->use_factors().empty())
+ const auto operand_lower_info = lower_info().operand.getRawPtr(index);
+ assert(operand_lower_info);
+ if (!operand_lower_info->def_factors().empty() || !operand_lower_info->use_factors().empty())
{
- auto factors_to_string = [](const ir::operand::PermuteFactorSet &factors) {
+ auto shape_to_string = [](const ir::Shape &shape) {
+ std::stringstream sstream;
+ sstream << "{ ";
+ for (auto i = 0; i < shape.rank(); ++i)
+ sstream << (shape.dim(i)) << " ";
+ sstream << "}";
+ return sstream.str();
+ };
+
+ auto factors_to_string = [](const PermuteFactorSet &factors) {
std::string str;
for (auto factor : factors)
{
return "{ " + str + "}";
};
- auto operation_index_to_string = [](const ir::OperationIndexSet &operations) {
- std::string str;
+ auto operation_index_set_to_string = [](const ir::OperationIndexSet &operations) {
+ std::stringstream sstream;
+ sstream << "{ ";
for (auto op : operations)
- {
- str += std::to_string(op.value());
- str += " ";
- }
- return "{ " + str + "}";
+ sstream << op << " ";
+ sstream << "}";
+ return sstream.str();
+ };
+
+ auto data_to_str = [](const ir::Data *data) {
+ return (data ? (std::to_string(data->size()) + " bytes") : "N/A");
};
- const auto lower_info = getLowerInfo(index);
- const auto &shape = object.shape();
- std::string def_ops =
- object.getDef().valid() ? std::to_string(object.getDef().value()) : "N/A";
- std::string use_ops = operation_index_to_string(object.getUses());
- std::string def_layouts = factors_to_string(lower_info->def_factors());
- std::string use_layouts = factors_to_string(lower_info->use_factors());
- sstream << "Operand #" << index.value() << " LowerInfo" << std::endl;
- sstream << " - Shape : { ";
- for (auto i = 0; i < shape.rank(); ++i)
- {
- sstream << (shape.dim(i)) << " ";
- }
- sstream << "}" << std::endl;
- sstream << " - Def Operations : " << def_ops << std::endl;
- sstream << " - Use Operations : " << use_ops << std::endl;
- sstream << " - Data : "
- << (object.data() ? (std::to_string(object.data()->size()) + " bytes") : "N/A")
- << std::endl;
- sstream << " - Lower Info" << std::endl;
- sstream << " - Def Backends : " << def_layouts << std::endl;
- sstream << " - Use Backends : " << use_layouts << std::endl;
+ std::string shape_str = shape_to_string(object.shape());
+ std::string def_op = operation_index_set_to_string({object.getDef()});
+ std::string use_ops = operation_index_set_to_string(object.getUses());
+ std::string def_factors = factors_to_string(operand_lower_info->def_factors());
+ std::string use_factors = factors_to_string(operand_lower_info->use_factors());
+ std::stringstream sstream;
+ sstream << "Operand " << index << " Info" << std::endl;
+ sstream << " - Shape : " << shape_str << std::endl;
+ sstream << " - Def/Uses : Def " << def_op << " Uses " << use_ops << std::endl;
+ sstream << " - Data : " << data_to_str(object.data()) << std::endl;
+ sstream << " - LowerInfo : Def " << def_factors << " Uses " << use_factors << std::endl;
+ dumps.emplace(index.value(), sstream.str());
}
- dumps.emplace(index.value(), sstream.str());
});
for (const auto &e : dumps)
{
if (!e.second.empty())
{
- VERBOSE(Lower) << e.second;
- }
- }
-}
-
-bool LoweredGraph::mergeable(const ir::OpSequenceIndex &op_seq_index,
- const ir::OperationIndex &node_index, ir::Layout layout,
- const BackendResolver &backend_resolver)
-{
- // Are they mergeable?
- // 1. the same backend id and layout?
- // 2. Is op_seq or node branched?
- // 3. if 1 is true, the op_seq and a node are connected?
- const auto &op_seq = _op_seqs.at(op_seq_index);
- const auto &node = _graph.operations().at(node_index);
-
- // The same backend id and layout?
- {
- const auto op_seq_backend_layout = getLowerInfo(op_seq_index)->layout();
- const auto &op_seq_backend_id = getLowerInfo(op_seq_index)->backend()->config()->id();
- const auto &node_backend_id = backend_resolver.getBackend(node_index)->config()->id();
- VERBOSE(Lower) << "OpSequence#" << op_seq_index.value() << " { " << op_seq_backend_id << "("
- << to_string(op_seq_backend_layout) << ") } "
- << " NODE#" << node_index.value() << " (" << node.name() << ") { "
- << node_backend_id << "(" << to_string(layout) << ") } " << std::endl;
- if (op_seq_backend_id != node_backend_id || op_seq_backend_layout != layout)
- return false;
- }
-
- // Branched?
- {
- std::unordered_set<ir::OperationIndex> branched_set;
-
- // Check for branching up
- for (const auto &input : op_seq.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
- {
- const auto &input_obj = _graph.operands().at(input);
- auto def = input_obj.getDef();
- if (def.valid())
- {
- branched_set.insert(def);
- if (branched_set.size() > 1)
- {
- return false;
- }
- }
- }
- branched_set.clear();
-
- // Check for branching down
- for (const auto &output : node.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
- {
- // TODO Fix this workaround for the case of model outputs that are used by another operation
- // This is needed since the branching is decided by operation, but for model outputs,
- // there is controlflow backen(use backend) but no actual use operation exists
- if (_graph.getOutputs().contains(output))
- return false;
-
- const auto &output_obj = _graph.operands().at(output);
- for (const auto &use : output_obj.getUses())
- {
- branched_set.insert(use);
- if (branched_set.size() > 1)
- {
- return false;
- }
- }
- }
- }
-
- // Connected?
- // an input of one node is an output of the other node? or vice-versa?
- {
- const auto &node_inputs = node.getInputs();
- const auto &node_outputs = node.getOutputs();
-
- // op_seq's operations are in order so that we just check the first and the last
- std::vector<ir::OperationIndex> op_seq_ops{op_seq.operations()[0]};
- if (op_seq.operations().size() > 1)
- op_seq_ops.emplace_back(op_seq.operations()[op_seq.operations().size() - 1]);
-
- for (const auto &n_index : op_seq_ops)
- {
- const auto &n = _graph.operations().at(n_index);
-
- // node's output == op_seq's input?
- for (const auto input : n.getInputs() | ir::Remove::UNDEFINED)
- {
- if (node_outputs.contains(input))
- {
- VERBOSE(Lower) << "OpSequence#" << op_seq_index.value() << " 's NODE#" << n_index.value()
- << "(" << n.name() << ") is connected to NODE#" << node_index.value()
- << "(" << node.name() << ")" << std::endl;
- return true;
- }
- }
-
- // node's input == op_seq's output?
- for (const auto output : n.getOutputs() | ir::Remove::UNDEFINED)
- {
- if (node_inputs.contains(output))
- {
- VERBOSE(Lower) << "OpSequence#" << op_seq_index.value() << " 's NODE#" << n_index.value()
- << " (" << n.name() << ") is connected to NODE#" << node_index.value()
- << std::endl;
- return true;
- }
- }
+ std::istringstream iss(e.second);
+ std::string line;
+ while (std::getline(iss, line))
+ VERBOSE(Lower) << line << std::endl;
}
-
- VERBOSE(Lower) << "OpSequence#" << op_seq_index.value() << " is not connected to NODE#"
- << node_index.value() << "(" << node.name() << ")" << std::endl;
}
-
- return false;
}
} // namespace compiler
namespace compiler
{
-ManualScheduler::ManualScheduler(const backend::BackendContexts &backend_contexts,
+ManualScheduler::ManualScheduler(const std::vector<const backend::Backend *> &backends,
const compiler::CompilerOptions &options)
- : _backend_contexts{backend_contexts}, _options{options}
+ : _backends{backends}, _options{options}
{
}
try
{
graph.operations().at(key); // Check if exist, or this will throw
- backend_resolver->setBackend(
- key, BackendManager::get().get(
- val)); // TODO Ensure this backend is available in backend contexts
+ backend_resolver->setBackend(key, BackendManager::get().get(val));
}
catch (...)
{
- VERBOSE(ManualScheduler) << "Invalid value while OperationIndex to Backend mapping : @"
- << key.value() << " -> \"" << val << "\"" << std::endl;
+ VERBOSE(ManualScheduler) << "Invalid value while OperationIndex to Backend mapping : @" << key
+ << " -> \"" << val << "\"" << std::endl;
}
}
// Dump final assignment
WHEN_LOG_ENABLED(backend_resolver->iterate(
- [&](const ir::OperationIndex &index, const backend::Backend &backend) {
- VERBOSE(ManualScheduler) << "backend for operation #" << index.value() << ": "
- << backend.config()->id() << std::endl;
- }));
+ [&](const ir::OperationIndex &index, const backend::Backend &backend) {
+ VERBOSE(ManualScheduler) << "backend for " << index << ": " << backend.config()->id()
+ << std::endl;
+ }));
return backend_resolver;
}
{
// Ensure if the backend is available in the current backend context
const backend::Backend *backend = BackendManager::get().get(id);
- if (!backend || _backend_contexts.find(backend) == _backend_contexts.end())
+ if (!backend || std::find(_backends.begin(), _backends.end(), backend) == _backends.end())
{
backend = fallback;
}
class ManualScheduler : public IScheduler
{
public:
- ManualScheduler(const backend::BackendContexts &backend_contexts,
+ ManualScheduler(const std::vector<const backend::Backend *> &backends,
const compiler::CompilerOptions &options);
std::unique_ptr<BackendResolver> schedule(const ir::Graph &graph) override;
const backend::Backend *fallback = nullptr);
private:
- const backend::BackendContexts &_backend_contexts;
+ std::vector<const backend::Backend *> _backends;
compiler::CompilerOptions _options;
};
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compiler/OperationLowerInfo.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+OperationLowerInfo::OperationLowerInfo(const backend::Backend *backend, ir::Layout layout)
+ : _permute_factor{backend, layout}
+{
+ // DO NOTHING
+}
+
+} // namespace compiler
+} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ParamChecker.h"
-
-#include "ir/Graph.h"
-
-namespace onert
-{
-namespace compiler
-{
-
-void ParamChecker::operator()()
-{
- _model->operations().iterate(
- [&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); });
-}
-
-} // namespace compiler
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file ParamChecker.h
- * @brief This file contains ParamChecker to check\n
- * operations' parameters are compilable at machine independent phase\n
- * ex) Check param is constant
- */
-#ifndef __ONERT_COMPILER_PARAM_CHECKER_H__
-#define __ONERT_COMPILER_PARAM_CHECKER_H__
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-class Graph;
-} // namespace ir
-} // namespace onert
-
-namespace onert
-{
-namespace compiler
-{
-
-class ParamChecker : public ir::OperationVisitor
-{
-public:
- /**
- * @brief Construct a new Param Checker object (deleted)
- */
- ParamChecker(void) = delete;
- /**
- * @brief Construct a new Param Checker object
- * @param[in] model Graph model to check
- */
- ParamChecker(std::shared_ptr<ir::Graph> model) : _model{model} {}
-
-public:
- /**
- * @brief Run parameter analysis
- */
- void operator()();
- /**
- * @brief Return analysis result if model have non-const parameter
- * @return @c true if there is non-const parameter, otherwise @c false
- */
- bool haveNoneConstParam(void) { return _nonConstParam; }
-
-private:
- const std::shared_ptr<ir::Graph> _model;
- bool _nonConstParam{false};
-};
-
-} // namespace compiler
-} // namespace onert
-
-#endif // __ONERT_COMPILER_OPERATION_VALIDATOR_H__
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compiler/PermuteFactor.h"
+
+#include <assert.h>
+#include <ostream>
+
+#include "backend/Backend.h"
+
+std::ostream &operator<<(std::ostream &os, const onert::compiler::PermuteFactor &obj)
+{
+ assert(obj.backend() && obj.backend()->config());
+ return os << "(" << obj.backend()->config()->id() << "/" << to_string(obj.layout()) << ")";
+}
#include <typeinfo>
#include "ir/Graph.h"
-#include "ir/operation/LowerInfo.h"
-
#include "util/logging.h"
#include "util/Utils.h"
{
ShapeValidator::ShapeValidator(const ir::Graph &graph)
- : _graph{graph}, _ctx{graph.operands()}, _current_layout{ir::Layout::UNKNOWN}
+ : _graph{graph}, _ctx{graph.operands()}, _current_layout{ir::Layout::UNKNOWN}
{
}
_current_layout = _graph.layout();
_graph.operations().iterate(
- [&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); });
+ [&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); });
}
void ShapeValidator::visit(const ir::operation::BatchMatMul &node)
const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
const auto block_size_index{
- node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
+ node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
const auto frontend_layout = _current_layout;
const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
const auto ifm_index{node.getInputs().at(ir::operation::BCQFullyConnected::Input::INPUT)};
const auto weight_scales_index{
- node.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_SCALES)};
+ node.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_SCALES)};
const auto weight_binary_index{
- node.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_BINARY)};
+ node.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_BINARY)};
const auto weight_cluster_index{
- node.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)};
+ node.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)};
// const auto bias_index{node.getInputs().at(ir::operation::BCQFullyConnected::Input::BIAS)};
OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 2);
const auto input_binary_index{node.getInputs().at(ir::operation::BCQGather::Input::INPUT_BINARY)};
const auto input_scales_index{node.getInputs().at(ir::operation::BCQGather::Input::INPUT_SCALES)};
const auto input_clusters_index{
- node.getInputs().at(ir::operation::BCQGather::Input::INPUT_CLUSTERS)};
+ node.getInputs().at(ir::operation::BCQGather::Input::INPUT_CLUSTERS)};
OP_REQUIRES(_ctx.at(indices_index).shape().rank() <= 2); // TODO : support rank up to 4 or more
OP_REQUIRES(_ctx.at(input_binary_index).shape().rank() == 2);
{
// Reducing C or
// (Reducing H and C(input and output) == 1) or (Reducing W and C(input and output) == 1)
- OP_REQUIRES((input_shape.dim(0) == output_shape.dim(0) &&
- input_shape.dim(1) == output_shape.dim(1) &&
- input_shape.dim(2) == output_shape.dim(2)) ||
- (input_shape.dim(0) == output_shape.dim(0) &&
- (input_shape.dim(1) == output_shape.dim(1) ||
- input_shape.dim(2) == output_shape.dim(1)) &&
- input_shape.dim(3) == 1 && output_shape.dim(2) == 1));
+ OP_REQUIRES(
+ (input_shape.dim(0) == output_shape.dim(0) && input_shape.dim(1) == output_shape.dim(1) &&
+ input_shape.dim(2) == output_shape.dim(2)) ||
+ (input_shape.dim(0) == output_shape.dim(0) &&
+ (input_shape.dim(1) == output_shape.dim(1) || input_shape.dim(2) == output_shape.dim(1)) &&
+ input_shape.dim(3) == 1 && output_shape.dim(2) == 1));
}
}
}
return;
const auto hidden_state_out_index{
- node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
+ node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)};
const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)};
const auto recurrent_weights_index{
- node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
+ node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)};
const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)};
const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
const auto block_size_index{
- node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
+ node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
const auto frontend_layout = _current_layout;
return;
const auto scratch_buffer_index{
- node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)}; // Optional
+ node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)}; // Optional
const auto output_state_out_index{
- node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)}; // Optional
+ node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)}; // Optional
const auto cell_state_out_index{
- node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)}; // Optional
+ node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)}; // Optional
const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
const auto input_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // Optional
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // Optional
const auto input_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
const auto input_to_cell_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
const auto input_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
const auto recurrent_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // Optional
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // Optional
const auto recurrent_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
const auto recurrent_to_cell_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
const auto recurrent_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
const auto cell_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // Optional
+ node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // Optional
const auto cell_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // Optional
+ node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // Optional
const auto cell_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // Optional
+ node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // Optional
const auto input_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)}; // Optional
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)}; // Optional
const auto forget_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
+ node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
const auto output_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
+ node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
const auto projection_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // Optional
+ node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // Optional
const auto projection_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // Optional
+ node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // Optional
const auto output_state_in_index{
- node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
+ node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
OP_REQUIRES(_ctx.at(input_index).shape().dim(i) == _ctx.at(output_index).shape().dim(i));
}
OP_REQUIRES(
- (_ctx.at(output_index).shape().rank() == 2 || _ctx.at(output_index).shape().rank() == 3) &&
- (_ctx.at(input_index).shape().rank() == 2 || _ctx.at(input_index).shape().rank() == 3) &&
- (!_ctx.exist(input_to_input_weights_index) ||
- _ctx.at(input_to_input_weights_index).shape().rank() == 2) &&
- _ctx.at(input_to_forget_weights_index).shape().rank() == 2 &&
- _ctx.at(input_to_cell_weights_index).shape().rank() == 2 &&
- _ctx.at(input_to_output_weights_index).shape().rank() == 2 &&
- (!_ctx.exist(recurrent_to_input_weights_index) ||
- _ctx.at(recurrent_to_input_weights_index).shape().rank() == 2) &&
- _ctx.at(recurrent_to_forget_weights_index).shape().rank() == 2 &&
- _ctx.at(recurrent_to_cell_weights_index).shape().rank() == 2 &&
- _ctx.at(recurrent_to_output_weights_index).shape().rank() == 2 &&
- (!_ctx.exist(projection_weights_index) ||
- _ctx.at(projection_weights_index).shape().rank() == 2) &&
- _ctx.at(output_state_in_index).shape().rank() == 2 &&
- _ctx.at(cell_state_in_index).shape().rank() == 2);
+ (_ctx.at(output_index).shape().rank() == 2 || _ctx.at(output_index).shape().rank() == 3) &&
+ (_ctx.at(input_index).shape().rank() == 2 || _ctx.at(input_index).shape().rank() == 3) &&
+ (!_ctx.exist(input_to_input_weights_index) ||
+ _ctx.at(input_to_input_weights_index).shape().rank() == 2) &&
+ _ctx.at(input_to_forget_weights_index).shape().rank() == 2 &&
+ _ctx.at(input_to_cell_weights_index).shape().rank() == 2 &&
+ _ctx.at(input_to_output_weights_index).shape().rank() == 2 &&
+ (!_ctx.exist(recurrent_to_input_weights_index) ||
+ _ctx.at(recurrent_to_input_weights_index).shape().rank() == 2) &&
+ _ctx.at(recurrent_to_forget_weights_index).shape().rank() == 2 &&
+ _ctx.at(recurrent_to_cell_weights_index).shape().rank() == 2 &&
+ _ctx.at(recurrent_to_output_weights_index).shape().rank() == 2 &&
+ (!_ctx.exist(projection_weights_index) ||
+ _ctx.at(projection_weights_index).shape().rank() == 2) &&
+ _ctx.at(output_state_in_index).shape().rank() == 2 &&
+ _ctx.at(cell_state_in_index).shape().rank() == 2);
OP_REQUIRES(
- (!_ctx.exist(cell_to_input_weights_index) ||
- _ctx.at(cell_to_input_weights_index).shape().rank() == 1) &&
- (!_ctx.exist(cell_to_forget_weights_index) ||
- _ctx.at(cell_to_forget_weights_index).shape().rank() == 1) &&
- (!_ctx.exist(cell_to_output_weights_index) ||
- _ctx.at(cell_to_output_weights_index).shape().rank() == 1) &&
- (!_ctx.exist(input_gate_bias_index) || _ctx.at(input_gate_bias_index).shape().rank() == 1) &&
- _ctx.at(forget_gate_bias_index).shape().rank() == 1 &&
- _ctx.at(cell_bias_index).shape().rank() == 1 &&
- _ctx.at(output_gate_bias_index).shape().rank() == 1 &&
- (!_ctx.exist(projection_bias_index) || _ctx.at(projection_bias_index).shape().rank() == 1));
+ (!_ctx.exist(cell_to_input_weights_index) ||
+ _ctx.at(cell_to_input_weights_index).shape().rank() == 1) &&
+ (!_ctx.exist(cell_to_forget_weights_index) ||
+ _ctx.at(cell_to_forget_weights_index).shape().rank() == 1) &&
+ (!_ctx.exist(cell_to_output_weights_index) ||
+ _ctx.at(cell_to_output_weights_index).shape().rank() == 1) &&
+ (!_ctx.exist(input_gate_bias_index) || _ctx.at(input_gate_bias_index).shape().rank() == 1) &&
+ _ctx.at(forget_gate_bias_index).shape().rank() == 1 &&
+ _ctx.at(cell_bias_index).shape().rank() == 1 &&
+ _ctx.at(output_gate_bias_index).shape().rank() == 1 &&
+ (!_ctx.exist(projection_bias_index) || _ctx.at(projection_bias_index).shape().rank() == 1));
// CIFG assertion
OP_REQUIRES(
- ((!_ctx.exist(input_to_input_weights_index) ||
- (_ctx.at(input_to_input_weights_index).shape().dim(0) == 0 &&
- _ctx.at(input_to_input_weights_index).shape().dim(1) == 0)) &&
- (!_ctx.exist(recurrent_to_input_weights_index) ||
- (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) == 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(1) == 0)) &&
- (!_ctx.exist(input_gate_bias_index) || _ctx.at(input_gate_bias_index).shape().dim(0) == 0) &&
- (!_ctx.exist(cell_to_input_weights_index) ||
- _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0)) ||
- ((_ctx.exist(input_to_input_weights_index) &&
- (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(input_to_input_weights_index).shape().dim(1) != 0)) &&
- (_ctx.exist(recurrent_to_input_weights_index) &&
- (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0)) &&
- (_ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0) != 0)));
+ ((!_ctx.exist(input_to_input_weights_index) ||
+ (_ctx.at(input_to_input_weights_index).shape().dim(0) == 0 &&
+ _ctx.at(input_to_input_weights_index).shape().dim(1) == 0)) &&
+ (!_ctx.exist(recurrent_to_input_weights_index) ||
+ (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) == 0 &&
+ _ctx.at(recurrent_to_input_weights_index).shape().dim(1) == 0)) &&
+ (!_ctx.exist(input_gate_bias_index) || _ctx.at(input_gate_bias_index).shape().dim(0) == 0) &&
+ (!_ctx.exist(cell_to_input_weights_index) ||
+ _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0)) ||
+ ((_ctx.exist(input_to_input_weights_index) &&
+ (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+ _ctx.at(input_to_input_weights_index).shape().dim(1) != 0)) &&
+ (_ctx.exist(recurrent_to_input_weights_index) &&
+ (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+ _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0)) &&
+ (_ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0) != 0)));
// Peephole assertion
OP_REQUIRES(((!_ctx.exist(cell_to_forget_weights_index) ||
(_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
_ctx.at(input_to_input_weights_index).shape().dim(1) != 0);
bool has_recurrent_to_input_weights =
- _ctx.exist(recurrent_to_input_weights_index) &&
- (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0);
+ _ctx.exist(recurrent_to_input_weights_index) &&
+ (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+ _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0);
bool has_input_gate_bias =
- _ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0) != 0;
+ _ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0) != 0;
bool has_cell_to_input_weights = _ctx.exist(cell_to_input_weights_index) &&
_ctx.at(cell_to_input_weights_index).shape().dim(0) != 0;
bool has_cell_to_forget_weights = _ctx.exist(cell_to_forget_weights_index) &&
(_ctx.at(projection_weights_index).shape().dim(0) != 0 &&
_ctx.at(projection_weights_index).shape().dim(1) != 0);
bool has_projection_bias =
- _ctx.exist(projection_bias_index) && _ctx.at(projection_bias_index).shape().dim(0) != 0;
+ _ctx.exist(projection_bias_index) && _ctx.at(projection_bias_index).shape().dim(0) != 0;
// NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG).
// true: no CIFG
bool has_projection_param = has_projection_weights;
const auto batch_size = (_ctx.at(input_index).shape().rank() == 3 && node.param().time_major)
- ? _ctx.at(input_index).shape().dim(1)
- : _ctx.at(input_index).shape().dim(0);
+ ? _ctx.at(input_index).shape().dim(1)
+ : _ctx.at(input_index).shape().dim(0);
OP_REQUIRES(batch_size == _ctx.at(output_state_in_index).shape().dim(0) &&
batch_size == _ctx.at(cell_state_in_index).shape().dim(0));
num_units == _ctx.at(cell_state_in_index).shape().dim(1));
const auto output_size =
- _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 1);
+ _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 1);
OP_REQUIRES(output_size == _ctx.at(recurrent_to_forget_weights_index).shape().dim(1) &&
output_size == _ctx.at(recurrent_to_cell_weights_index).shape().dim(1) &&
output_size == _ctx.at(recurrent_to_output_weights_index).shape().dim(1) &&
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::MatrixBandPart::Input::INPUT)};
const auto num_lower_index{
- node.getInputs().at(ir::operation::MatrixBandPart::Input::NUM_LOWER_DIAG)};
+ node.getInputs().at(ir::operation::MatrixBandPart::Input::NUM_LOWER_DIAG)};
const auto num_upper_index{
- node.getInputs().at(ir::operation::MatrixBandPart::Input::NUM_UPPER_DIAG)};
+ node.getInputs().at(ir::operation::MatrixBandPart::Input::NUM_UPPER_DIAG)};
// Check for dimension constraints
if (_ctx.at(output_index).info().isDynamic())
namespace compiler
{
-bool StaticShapeInferer::infer(const ir::OpSequence &op_seq)
+void StaticShapeInferer::inferSubgraph(ir::SubgraphIndex subg_ind)
+{
+ StaticShapeInferer inferer(subg_ind, _lowered_subgs);
+ auto &lgraph = _lowered_subgs.at(subg_ind);
+ for (auto op_ind : lgraph->graph().topolSortOperations())
+ {
+ auto &op = lgraph->graph().operations().at(op_ind);
+ bool has_dynamic_tensor = inferer.infer(op);
+ lgraph->setHasDynamicTensor(op_ind, has_dynamic_tensor);
+ }
+}
+
+bool StaticShapeInferer::infer(const ir::Operation &op)
{
bool has_dynamic_tensor = false;
- for (const auto &operation_idx : op_seq.operations())
- {
- auto &op = _operations.at(operation_idx);
- auto opcode = op.opcode();
+ auto opcode = op.opcode();
+
+ _return_has_dynamic_tensor = false; // this is used as a return value inside operation's visit()
- _return_has_dynamic_tensor = false; // this is used as a return value inside operation's visit()
+ // IF: need shape inference for then, else
+ // While: need shape inference for condition, body
+ if (opcode == ir::OpCode::If || opcode == ir::OpCode::While)
+ {
+ op.accept(*this);
+ }
+ else
+ {
+ _return_has_dynamic_tensor = checkDynamicInput(op);
- // IF: need shape inference for then, else
- // While: need shape inference for condition, body
- if (opcode == ir::OpCode::If || opcode == ir::OpCode::While)
+ if (_return_has_dynamic_tensor)
{
- op.accept(*this);
+ setDynamicOutput(op);
}
else
{
- _return_has_dynamic_tensor = checkDynamicInput(op);
-
- if (_return_has_dynamic_tensor)
- {
- setDynamicOutput(op);
- }
- else
- {
- op.accept(*this);
- }
+ op.accept(*this);
}
-
- has_dynamic_tensor = has_dynamic_tensor || _return_has_dynamic_tensor;
}
+ has_dynamic_tensor = has_dynamic_tensor || _return_has_dynamic_tensor;
+
return has_dynamic_tensor;
}
{
const auto index = pair.first;
const auto &lowered_subg = pair.second;
- VERBOSE(StaticShapeInferer) << "SubGraph #" << index.value() << std::endl;
+ VERBOSE(StaticShapeInferer) << index << std::endl;
lowered_subg->graph().operands().iterate(
- [&](const ir::OperandIndex &ind, const ir::Operand &operand) {
- VERBOSE(StaticShapeInferer) << "Operand #" << ind.value() << ", "
- << (operand.info().isDynamic() ? "Dynamic" : "Static") << ", "
- << get_shape_str(operand.info().shape()) << std::endl;
- });
+ [&](const ir::OperandIndex &ind, const ir::Operand &operand) {
+ VERBOSE(StaticShapeInferer)
+ << " " << ind << ", " << (operand.info().isDynamic() ? "Dynamic" : "Static") << ", "
+ << get_shape_str(operand.info().shape()) << std::endl;
+ });
}
}
// re-sizing output shape
ir::Shape new_shape =
- shape_inference::inferArgMinMaxShape(input.info().shape(), axis_value, rank);
+ shape_inference::inferArgMinMaxShape(input.info().shape(), axis_value, rank);
output.info().shape(new_shape);
}
const auto &input = _operands.at(input_idx);
const auto cluster_idx{
- op.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)};
+ op.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)};
const auto &cluster = _operands.at(cluster_idx);
const auto output_idx = op.getOutputs().at(0);
// re-sizing output shape
ir::Shape new_shape = shape_inference::inferBCQFullyConnectedShape(
- input.info().shape(), cluster.info().shape(), cluster_buf);
+ input.info().shape(), cluster.info().shape(), cluster_buf);
output.info().shape(new_shape);
}
// re-sizing output shape
ir::Shape new_shape = shape_inference::inferBCQGatherShape(
- indices.info().shape(), cluster.info().shape(), cluster_buf, rank, op.param());
+ indices.info().shape(), cluster.info().shape(), cluster_buf, rank, op.param());
output.info().shape(new_shape);
}
// re-sizing output shape
ir::Shape new_shape =
- shape_inference::inferConv2DShape(input.info().shape(), ker.info().shape(), op.param());
+ shape_inference::inferConv2DShape(input.info().shape(), ker.info().shape(), op.param());
output.info().shape(new_shape);
}
assert(axis.data()->base());
int32_t axis_value =
- (axis_type == ir::DataType::INT32)
- ? reinterpret_cast<const int32_t *>(axis.data()->base())[0]
- : static_cast<int32_t>(reinterpret_cast<const int64_t *>(axis.data()->base())[0]);
+ (axis_type == ir::DataType::INT32)
+ ? reinterpret_cast<const int32_t *>(axis.data()->base())[0]
+ : static_cast<int32_t>(reinterpret_cast<const int64_t *>(axis.data()->base())[0]);
// re-sizing output shape
ir::Shape new_shape = shape_inference::inferExpandDimsShape(input.info().shape(), axis_value);
const auto &dims_shape = shape.info().shape();
auto new_shape = ((dims_type == ir::DataType::INT32)
- ? shape_inference::inferFillShape<int32_t>(
- dims_shape, reinterpret_cast<const int32_t *>(dims_buf))
- : shape_inference::inferFillShape<int64_t>(
- dims_shape, reinterpret_cast<const int64_t *>(dims_buf)));
+ ? shape_inference::inferFillShape<int32_t>(
+ dims_shape, reinterpret_cast<const int32_t *>(dims_buf))
+ : shape_inference::inferFillShape<int64_t>(
+ dims_shape, reinterpret_cast<const int64_t *>(dims_buf)));
output.info().shape(new_shape);
}
ir::Operand &output = _operands.at(output_idx);
// re-sizing output shape
ir::Shape new_shape =
- shape_inference::inferFullyConnectedShape(input.info().shape(), ker.info().shape());
+ shape_inference::inferFullyConnectedShape(input.info().shape(), ker.info().shape());
output.info().shape(new_shape);
}
// re-sizing output shape
ir::Shape new_shape =
- shape_inference::inferGatherShape(input.info().shape(), indices.info().shape(), axis, rank);
+ shape_inference::inferGatherShape(input.info().shape(), indices.info().shape(), axis, rank);
output.info().shape(new_shape);
}
}
}
- // re-sizing operands of then subgraph
- StaticShapeInferer then_inferer(op.param().then_subg_index, _lowered_subgs);
- _lowered_subgs.at(op.param().then_subg_index)
- ->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
- bool has_dynamic_tensor = then_inferer.infer(op_seq);
- op_seq.has_dynamic_tensor(has_dynamic_tensor);
- });
-
- // re-sizing operands of else subgraph
- StaticShapeInferer else_inferer(op.param().else_subg_index, _lowered_subgs);
- _lowered_subgs.at(op.param().else_subg_index)
- ->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
- bool has_dynamic_tensor = else_inferer.infer(op_seq);
- op_seq.has_dynamic_tensor(has_dynamic_tensor);
- });
+ inferSubgraph(op.param().then_subg_index);
+ inferSubgraph(op.param().else_subg_index);
// re-sizing output shapes
+ // TODO use then_graph / else_graph instead
const auto &then_outputs = _lowered_subgs.at(op.param().then_subg_index)->graph().getOutputs();
const auto &else_outputs = _lowered_subgs.at(op.param().else_subg_index)->graph().getOutputs();
assert(outputs.size() == then_outputs.size());
auto &output = _operands.at(output_index);
const auto output_state_out_index{
- op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
+ op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
const auto cell_state_out_index{op.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
const auto scratch_buffer_index{op.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
- if (output.info().isDynamic() || (_operands.exist(output_state_out_index) &&
- _operands.at(output_state_out_index).info().isDynamic()) ||
+ if (output.info().isDynamic() ||
+ (_operands.exist(output_state_out_index) &&
+ _operands.at(output_state_out_index).info().isDynamic()) ||
(_operands.exist(cell_state_out_index) &&
_operands.at(cell_state_out_index).info().isDynamic()) ||
(_operands.exist(scratch_buffer_index) &&
const auto &input = _operands.at(input_index);
const auto input_to_output_weights_index{
- op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
+ op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
const auto &input_to_output_weights = _operands.at(input_to_output_weights_index);
const auto recurrent_to_output_weights_index{
- op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
+ op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
const auto &recurrent_to_output_weights = _operands.at(recurrent_to_output_weights_index);
// re-sizing outputs
auto &scratch_buffer = _operands.at(scratch_buffer_index);
const auto input_to_input_weights_index{
- op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)};
+ op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)};
const auto recurrent_to_input_weights_index{
- op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)};
+ op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)};
bool has_input_to_input_weights =
- _operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
- _operands.at(input_to_input_weights_index).shape().dim(1) != 0;
+ _operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+ _operands.at(input_to_input_weights_index).shape().dim(1) != 0;
bool has_recurrent_to_input_weights =
- _operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
- _operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
+ _operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+ _operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
// NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG).
// true: no CIFG
// re-sizing output shape
const auto new_shape = shape_inference::inferPadShape(
- input.shape(), reinterpret_cast<const int32_t *>(pad.data()->base()),
- pad.shape().num_elements());
+ input.shape(), reinterpret_cast<const int32_t *>(pad.data()->base()),
+ pad.shape().num_elements());
output.info().shape(new_shape);
}
if (output.typeInfo().type() == ir::DataType::FLOAT32)
{
new_shape = shape_inference::inferRangeShape<float>(
- start_op.asScalar<float>(), limit_op.asScalar<float>(), delta_op.asScalar<float>());
+ start_op.asScalar<float>(), limit_op.asScalar<float>(), delta_op.asScalar<float>());
}
else if (output.typeInfo().type() == ir::DataType::INT32)
{
new_shape = shape_inference::inferRangeShape<int32_t>(
- start_op.asScalar<int32_t>(), limit_op.asScalar<int32_t>(), delta_op.asScalar<int32_t>());
+ start_op.asScalar<int32_t>(), limit_op.asScalar<int32_t>(), delta_op.asScalar<int32_t>());
}
assert(output.shape() == new_shape);
}
// re-sizing output shape
ir::Shape new_shape =
- shape_inference::inferReduceShape(input.info().shape(), axes_vec, keep_dims);
+ shape_inference::inferReduceShape(input.info().shape(), axes_vec, keep_dims);
output.info().shape(new_shape);
}
assert(shape_buf);
ir::Shape new_shape = shape_inference::inferReshapeShape(
- shape_buf, shape.shape().num_elements(), input.shape().num_elements());
+ shape_buf, shape.shape().num_elements(), input.shape().num_elements());
// if shape is from Const, TFLC put the shape of output into tensor
if (new_shape != output.shape())
{
// Let's check the new_shape option
auto shape = op.param().new_shape;
- ir::Shape new_shape = shape_inference::inferReshapeShape(shape.data(), shape.size(),
- input.shape().num_elements());
+ ir::Shape new_shape =
+ shape_inference::inferReshapeShape(shape.data(), shape.size(), input.shape().num_elements());
if (new_shape != output.shape())
{
// Shape inferencing logic based on Params
ir::Shape new_shape =
- shape_inference::inferResizeBilinearShape(input.shape(), height_out, width_out);
+ shape_inference::inferResizeBilinearShape(input.shape(), height_out, width_out);
// if size_op is from Const, TFLC put the shape of output into tensor
if (new_shape != output.shape())
// Select output shpae
ir::Shape new_shape = shape_inference::inferSelectShape(
- input_cond.info().shape(), input_true.info().shape(), input_false.info().shape());
+ input_cond.info().shape(), input_true.info().shape(), input_false.info().shape());
output.info().shape(new_shape);
}
return;
}
- auto begins_buf = reinterpret_cast<const int32_t *>(begins.data()->base());
- auto sizes_buf = reinterpret_cast<const int32_t *>(sizes.data()->base());
+ auto begins_buf = begins.data()->base();
+ auto sizes_buf = sizes.data()->base();
+
+ const auto begins_type = begins.typeInfo().type();
+ assert(begins_type == ir::DataType::INT32 || begins_type == ir::DataType::INT64);
+ assert(begins_type == sizes.typeInfo().type());
ir::Shape new_shape =
- shape_inference::inferSliceShape(input.info().shape(), begins_buf, sizes_buf);
+ (begins_type == ir::DataType::INT32)
+ ? shape_inference::inferSliceShape<int32_t>(input.info().shape(),
+ reinterpret_cast<const int32_t *>(begins_buf),
+ reinterpret_cast<const int32_t *>(sizes_buf))
+ : shape_inference::inferSliceShape<int64_t>(input.info().shape(),
+ reinterpret_cast<const int64_t *>(begins_buf),
+ reinterpret_cast<const int64_t *>(sizes_buf));
output.info().shape(new_shape);
}
auto padding_data = reinterpret_cast<const int32_t *>(padding.data()->base());
ir::Shape new_shape = shape_inference::inferSpaceToBatchNDShape(
- input_shape, block_shape_shape, padding_shape, block_shape_data, padding_data);
+ input_shape, block_shape_shape, padding_shape, block_shape_data, padding_data);
output.info().shape(new_shape);
}
assert(0 <= axis_value && axis_value < rank);
ir::Shape new_shape =
- shape_inference::inferSplitShape(input.info().shape(), axis_value, num_splits);
+ shape_inference::inferSplitShape(input.info().shape(), axis_value, num_splits);
for (auto output_idx : outputs)
{
ir::Operand &output = _operands.at(output_idx);
auto strides_buf = reinterpret_cast<const uint32_t *>(strides.data()->base());
auto op_params = shape_inference::buildStridedSliceParams(
- starts_buf, ends_buf, strides_buf, begin_mask, end_mask, shrink_axis_mask, rank);
+ starts_buf, ends_buf, strides_buf, begin_mask, end_mask, shrink_axis_mask, rank);
ir::Shape new_shape =
- shape_inference::inferStridedSliceShape(input.info().shape(), op_params, rank);
+ shape_inference::inferStridedSliceShape(input.info().shape(), op_params, rank);
output.info().shape(new_shape);
}
}
// re-sizing operands of body subgraph
- StaticShapeInferer body_inferer(op.param().body_subg_index, _lowered_subgs);
- _lowered_subgs.at(op.param().body_subg_index)
- ->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
- bool has_dynamic_tensor = body_inferer.infer(op_seq);
- op_seq.has_dynamic_tensor(has_dynamic_tensor);
- });
+ inferSubgraph(op.param().body_subg_index);
// Check whether while operation's shapes are predictable
// If any of shape of body outputs and cond inputs are different, non-constant operands would be
}
// Set non-constant operands of body subgraph to dynamic
- StaticShapeInferer body_inferer(op.param().body_subg_index, _lowered_subgs);
- _lowered_subgs.at(op.param().body_subg_index)
- ->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
- bool has_dynamic_tensor = body_inferer.infer(op_seq);
- op_seq.has_dynamic_tensor(has_dynamic_tensor);
- });
+ inferSubgraph(op.param().body_subg_index);
}
// re-sizing operands of cond subgraph
// If check_unpredictable_dynamic is true, non-constant operands of cond subgraph would be set to
// dynamic
- StaticShapeInferer cond_inferer(op.param().cond_subg_index, _lowered_subgs);
- _lowered_subgs.at(op.param().cond_subg_index)
- ->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
- bool has_dynamic_tensor = cond_inferer.infer(op_seq);
- op_seq.has_dynamic_tensor(has_dynamic_tensor);
- });
+ inferSubgraph(op.param().cond_subg_index);
// re-sizing outputs of while operation
// If check_unpredictable_dynamic is true, outputs of while operation would be set to dynamic
#include <memory>
#include "backend/BackendContext.h"
#include "backend/Backend.h"
-#include "backend/controlflow/Config.h"
-#include "backend/controlflow/TensorBuilder.h"
-#include "backend/controlflow/TensorRegistry.h"
+#include "backend/builtin/Config.h"
+#include "backend/builtin/TensorBuilder.h"
+#include "backend/builtin/TensorRegistry.h"
namespace onert
{
public:
TensorRegistries() = default;
- TensorRegistries(const onert::backend::BackendContexts &backend_contexts,
- bool include_controlflow)
+ TensorRegistries(const onert::backend::BackendContexts &backend_contexts, bool include_builtin)
{
for (const auto &e : backend_contexts)
{
auto tensor_reg = e.second->tensor_registry;
- if (e.first->config()->id() == backend::controlflow::Config::ID)
+ if (e.first->config()->id() == backend::builtin::Config::ID)
{
- _cf_tensor_reg =
- std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(tensor_reg);
- if (include_controlflow)
+ _builtin_tensor_reg =
+ std::dynamic_pointer_cast<backend::builtin::TensorRegistry>(tensor_reg);
+ if (include_builtin)
_tensor_regs.insert(tensor_reg);
}
else
return _tensor_regs.cend();
}
- std::shared_ptr<backend::controlflow::TensorRegistry> getControlflowTensorRegistry() const
+ std::shared_ptr<backend::builtin::TensorRegistry> getBuiltinTensorRegistry() const
{
- return _cf_tensor_reg;
+ return _builtin_tensor_reg;
}
backend::ITensor *getITensor(ir::OperandIndex ind) const
private:
std::unordered_set<std::shared_ptr<backend::ITensorRegistry>> _tensor_regs;
- std::shared_ptr<backend::controlflow::TensorRegistry> _cf_tensor_reg;
+ std::shared_ptr<backend::builtin::TensorRegistry> _builtin_tensor_reg;
};
} // namespace compiler
#include "ConstantInsertionPass.h"
#include "backend/Backend.h"
-#include <ir/Graph.h>
-#include <util/Utils.h>
+#include "ir/Graph.h"
+#include "util/Utils.h"
+#include "util/logging.h"
namespace onert
{
void ConstantInsertionPass::callback(const ir::OperationIndex &node_index, ir::Operation &node)
{
- const auto &op_sequence_index = _lowered_graph.op_seqs().getOperation(node_index);
- const auto op_seq_lower_info = _lowered_graph.getLowerInfo(op_sequence_index);
- const auto backend = op_seq_lower_info->backend();
- const auto layout = op_seq_lower_info->layout();
- const auto factor = ir::operand::PermuteFactor{backend, layout};
+ const auto op_lower_info = _lowered_graph.lower_info().operation.getRawPtr(node_index);
+ const auto backend = op_lower_info->backend();
+ const auto layout = op_lower_info->layout();
+ const auto factor = PermuteFactor{backend, layout};
for (const auto input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
{
if (_replace_operands_map.count(key) == 0)
{
ir::Operand new_object(object);
- new_object.unsetDef();
- // TODO Remove const_case
- const_cast<ir::OperationIndexSet &>(new_object.getUses()).clear();
+ new_object.clearDefUse();
const auto new_index = _graph.operands().emplace(new_object);
_replace_operands_map[key] = new_index;
}
const auto replaced_input = _replace_operands_map[key];
- // Update op_seq
- if (_lowered_graph.op_seqs().at(op_sequence_index).getInputs().contains(input))
- {
- // All inputs of op_seq have the same PermuteFactor because those inputs are inputs of first
- // operation
- _lowered_graph.op_seqs().at(op_sequence_index).replaceInputs(input, replaced_input);
- }
// Update the same inputs of a node at once because inputs of an operation have the same
// PermuteFactor
auto &replaced_object = _graph.operands().at(replaced_input);
replaced_object.insertUse(node_index);
+ VERBOSE(ConstInsertPass) << "New operand " << replaced_input << " added(copy of " << input
+ << ") for " << factor << std::endl;
// Remove this node from uses of origin operand
// Constant operand has no def.
assert(!object.getDef().valid());
// Remove origin operand
if (object.getUses().size() == 0)
+ {
_graph.removeOperand(input);
+ VERBOSE(ConstInsertPass) << "Original operand " << input << " removed - no uses"
+ << std::endl;
+ }
}
}
#ifndef __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__
#define __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__
-#include <ir/operand/PermuteFactor.h>
+#include <compiler/PermuteFactor.h>
#include <ir/Index.h>
#include "LoweredOperationPass.h"
#include <unordered_map>
struct ReplaceKey
{
ir::OperandIndex index;
- ir::operand::PermuteFactor factor;
+ PermuteFactor factor;
bool operator==(const ReplaceKey &other) const
{
std::size_t operator()(const ReplaceKey &key) const noexcept
{
using std::hash;
- return hash<ir::OperandIndex>()(key.index) ^
- (hash<ir::operand::PermuteFactor>()(key.factor) << 1);
+ return hash<ir::OperandIndex>()(key.index) ^ (hash<PermuteFactor>()(key.factor) << 1);
}
};
#include "backend/Backend.h"
#include <ir/Graph.h>
-#include <ir/operand/PermuteFactor.h>
+#include <compiler/PermuteFactor.h>
#include <util/Utils.h>
+#include "util/logging.h"
namespace onert
{
void ConstantLoweringPass::callback(const ir::OperationIndex &node_index, ir::Operation &node)
{
- const auto &op_sequence_index = _lowered_graph.op_seqs().getOperation(node_index);
- const auto op_seq_lower_info = _lowered_graph.getLowerInfo(op_sequence_index);
- const auto backend = op_seq_lower_info->backend();
- const auto layout = op_seq_lower_info->layout();
- const auto factor = ir::operand::PermuteFactor{backend, layout};
+ const auto op_lower_info = _lowered_graph.lower_info().operation.getRawPtr(node_index);
+ const auto backend = op_lower_info->backend();
+ const auto layout = op_lower_info->layout();
+ const auto factor = PermuteFactor{backend, layout};
// Now this runtime does not support the node making output of operation as constant
for (const auto input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
{
// All constant operand are already assinged at each backend by ContantInsertionPass. So a
// constant has `def` and `use` as the same PermuteFactor
- _lowered_graph.setLowerInfo(input, std::make_unique<ir::operand::LowerInfo>());
- _lowered_graph.getLowerInfo(input)->addDefPermuteFactor(factor);
- _lowered_graph.getLowerInfo(input)->addUsePermuteFactor(factor);
+ auto operand_li = std::make_unique<compiler::OperandLowerInfo>();
+ operand_li->addDefPermuteFactor(factor);
+ operand_li->addUsePermuteFactor(factor);
+ _lowered_graph.lower_info().operand.set(input, std::move(operand_li));
}
}
}
{
public:
LoweredOperandPass(compiler::LoweredGraph &lowered_graph)
- : OperandPass{lowered_graph.graph()}, _lowered_graph{lowered_graph}
+ : OperandPass{lowered_graph.graph()}, _lowered_graph{lowered_graph}
{
// DO NOTHING
}
{
public:
LoweredOperationPass(LoweredGraph &lowered_graph)
- : OperationPass{lowered_graph.graph()}, _lowered_graph{lowered_graph}
+ : OperationPass{lowered_graph.graph()}, _lowered_graph{lowered_graph}
{
// DO NOTHING
}
void OperandPass::run()
{
_graph.operands().iterate(
- [&](const ir::OperandIndex &index, ir::Operand &object) { callback(index, object); });
+ [&](const ir::OperandIndex &index, ir::Operand &object) { callback(index, object); });
}
} // namespace pass
void OperationPass::run()
{
_graph.operations().iterate(
- [&](const ir::OperationIndex &index, ir::Operation &node) { callback(index, node); });
+ [&](const ir::OperationIndex &index, ir::Operation &node) { callback(index, node); });
}
} // namespace pass
namespace ir
{
class Graph;
-} // namespace compiler
+} // namespace ir
} // namespace onert
namespace onert
VERBOSE(PassRunner) << "Start running '" << pass->id() << "'" << std::endl;
pass->run();
VERBOSE(PassRunner) << "Finished running '" << pass->id() << "'" << std::endl;
- // TODO Dump graph(LowerInfo, OpSequence, ...)?
+ // TODO Dump graph?
}
}
*/
#include "PermutationEliminationPass.h"
-#include "backend/controlflow/Config.h"
+#include "backend/builtin/Config.h"
#include "util/logging.h"
// Check if two tensors are both portable if not, we can't eliminate the node
{
- auto in_def_factor = _lowered_graph.getLowerInfo(in_operand)->def_factors().getOnlyElement();
- auto out_def_factor = _lowered_graph.getLowerInfo(out_operand)->def_factors().getOnlyElement();
+ auto &operand_li_map = _lowered_graph.lower_info().operand;
+ auto in_def_factor = operand_li_map.getRawPtr(in_operand)->def_factors().getOnlyElement();
+ auto out_def_factor = operand_li_map.getRawPtr(out_operand)->def_factors().getOnlyElement();
auto in_config = in_def_factor.backend()->config();
auto out_config = out_def_factor.backend()->config();
auto &out_operand_obj = _graph.operands().at(out_operand);
assert(out_operand_obj.getDef() == _op_ind);
out_operand_obj.unsetDef();
- _lowered_graph.op_seqs().iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
- if (!op_seq.getOutputs().contains(in_operand))
+ _graph.operations().iterate([&](const ir::OperationIndex &op_ind, ir::Operation &op) {
+ if (!op.getOutputs().contains(in_operand))
return;
-
- // Update OpSequence/ir::Operation edges and ir::Operand edges
- op_seq.replaceOutputs(in_operand, out_operand);
- for (auto op : op_seq.operations())
- {
- auto &operation_obj = _graph.operations().at(op);
- if (operation_obj.getOutputs().contains(in_operand))
- {
- operation_obj.replaceOutputs(in_operand, out_operand);
- out_operand_obj.setDef(op);
- }
- }
+ // Update Operation and Operand edges
+ op.replaceOutputs(in_operand, out_operand);
+ out_operand_obj.setDef(op_ind);
});
- // Remove Permute operation, enclosing OpSequence and the operand
+ // Remove Permute operation and the operand
{
_graph.removeOperand(in_operand);
-
- auto op_seq_ind = _lowered_graph.op_seqs().getOperation(_op_ind);
- // Assumes enclosing OpSequence contatins just this Permute operation
- assert(_lowered_graph.op_seqs().at(op_seq_ind).size() == 1);
- _lowered_graph.op_seqs().remove(op_seq_ind);
_graph.operations().remove(_op_ind);
}
- _lowered_graph.op_seqs().iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
- if (!op_seq.getInputs().contains(in_operand))
+ _graph.operations().iterate([&](const ir::OperationIndex &op_ind, ir::Operation &op) {
+ if (!op.getInputs().contains(in_operand))
return;
-
- op_seq.replaceInputs(in_operand, out_operand);
- for (auto op : op_seq.operations())
- {
- auto &operation_obj = _graph.operations().at(op);
- if (operation_obj.getInputs().contains(in_operand))
- {
- operation_obj.replaceInputs(in_operand, out_operand);
- out_operand_obj.insertUse(op);
- }
- }
+ op.replaceInputs(in_operand, out_operand);
+ out_operand_obj.insertUse(op_ind);
});
VERBOSE(removePermute) << "Permute Op removed, node index : " << _op_ind << std::endl;
- VERBOSE(removePermute) << " - Input (removed) ir::Operand : " << in_operand << std::endl;
- VERBOSE(removePermute) << " - Output(kept) ir::Operand : " << out_operand << std::endl;
+ VERBOSE(removePermute) << " - Input (removed) Operand : " << in_operand << std::endl;
+ VERBOSE(removePermute) << " - Output(kept) Operand : " << out_operand << std::endl;
}
else
{
auto &in_operand_obj = _graph.operands().at(in_operand);
in_operand_obj.removeUse(_op_ind);
- // Make OpSequences(that use the output) use the input
- _lowered_graph.op_seqs().iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
- if (!op_seq.getInputs().contains(out_operand))
+ // Make operations(that use the output) use the input
+ _graph.operations().iterate([&](const ir::OperationIndex &op_ind, ir::Operation &op) {
+ if (!op.getInputs().contains(out_operand))
return;
-
- op_seq.replaceInputs(out_operand, in_operand);
- for (auto op : op_seq.operations())
- {
- auto &operation_obj = _graph.operations().at(op);
- if (operation_obj.getInputs().contains(out_operand))
- {
- operation_obj.replaceInputs(out_operand, in_operand);
- in_operand_obj.insertUse(op);
- }
- }
+ op.replaceInputs(out_operand, in_operand);
+ in_operand_obj.insertUse(op_ind);
});
- // Remove Permute operation, enclosing OpSequence and the operand
+ // Remove the Permute operation and out_operand
{
_graph.removeOperand(out_operand);
-
- auto op_seq_ind = _lowered_graph.op_seqs().getOperation(_op_ind);
- // Assumes enclosing OpSequence contatins just this Permute operation
- assert(_lowered_graph.op_seqs().at(op_seq_ind).size() == 1);
- _lowered_graph.op_seqs().remove(op_seq_ind);
_graph.operations().remove(_op_ind);
}
- VERBOSE(removePermute) << "Permute Op removed, node index : " << _op_ind << std::endl;
- VERBOSE(removePermute) << " - Input (kept) ir::Operand : " << in_operand << std::endl;
- VERBOSE(removePermute) << " - Output(removed) ir::Operand : " << out_operand << std::endl;
+ VERBOSE(removePermute) << "Permute Op removed : " << _op_ind << std::endl;
+ VERBOSE(removePermute) << " - Input (kept) Operand : " << in_operand << std::endl;
+ VERBOSE(removePermute) << " - Output(removed) Operand : " << out_operand << std::endl;
}
}
* are compatible and layouts match.
*
* Permute input tensor is kept and the output is removed for all the cases, except model outputs.
- * As all output tensors have to be controlflow backend, so the output is kept.
+ * As all output tensors have to be builtin backend, so the output is kept.
*
* @note This is an optimization pass which means that everything should work fine even if this pass
* was skipped.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
#include <utility>
#include <unordered_map>
-#include "backend/controlflow/Config.h"
+#include "backend/builtin/Config.h"
#include "ir/Operand.h"
-#include "ir/operation/LowerInfo.h"
+#include "compiler/OperationLowerInfo.h"
#include "ir/Graph.h"
#include "backend/IConfig.h"
#include "util/logging.h"
void PermutationInsertionPass::callback(const ir::OperandIndex &index, ir::Operand &object)
{
- auto &&operand_li = _lowered_graph.getLowerInfo(index);
+ auto &operand_li_map = _lowered_graph.lower_info().operand;
+ auto &&operand_li = operand_li_map.getRawPtr(index);
assert(operand_li);
// NOTE Later, constants also will have Def
std::list<ir::OperationIndex> permute_indexes;
// Build a map for all necessary type of operands
- std::unordered_map<ir::operand::PermuteFactor, ir::OperandIndex> factor_to_index;
+ std::unordered_map<PermuteFactor, ir::OperandIndex> factor_to_index;
{
assert(operand_li->def_factors().size() == 1);
for (auto factor : operand_li->def_factors())
continue;
auto &operation = _graph.operations().at(use);
- assert(_lowered_graph.op_seqs().containsOperation(use));
- auto op_seq_index = _lowered_graph.op_seqs().getOperation(use);
- auto op_seq_li = _lowered_graph.getLowerInfo(op_seq_index);
- assert(op_seq_li);
- const auto op_seq_layout = op_seq_li->layout();
- const backend::Backend *backend = op_seq_li->backend();
+ auto op_li = _lowered_graph.lower_info().operation.getRawPtr(use);
+ assert(op_li);
+ const auto op_layout = op_li->layout();
+ const backend::Backend *backend = op_li->backend();
assert(backend);
auto use_node_inputs = operation.getInputs();
assert(use_node_inputs.contains(index));
- auto new_index = factor_to_index.at({backend, op_seq_layout});
+ auto new_index = factor_to_index.at({backend, op_layout});
if (index != new_index)
{
- // Update from op_seq
- // Replace the same inputs of an OpSequence at once for the following reasons:
- // 1. An OpSequence's inputs are the same inputs of first operation
- // 2. An OpSequence may have inputs as the same operand (2 or more).
- // 3. The same inputs of OpSequence have the same PermuteFactor.
- _lowered_graph.op_seqs().at(op_seq_index).replaceInputs(index, new_index);
-
// Update from operation
// Replace the same inputs of an operation at once for the following reasons:
// No. 2 and 3 above
// Update from operand
remove_list.push_back(
- use); // Removal should be done in another loop since we are in the loop
+ use); // Removal should be done in another loop since we are in the loop
_graph.operands().at(new_index).insertUse(use);
}
}
}
ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandIndex &operand_index,
- const ir::operand::PermuteFactor &factor)
+ const PermuteFactor &factor)
{
- assert(!_graph.isBuildingPhase());
-
auto &operand = _graph.operands().at(operand_index);
// Generate output operand and permute operation
auto out_operand_index = _graph.addOperand(operand.shape(), operand.typeInfo());
- // change model output if operand_index is model output index and the out operand is controlflow
+ // change model output if operand_index is model output index and the out operand is builtin
// backend
auto &model_outputs = _graph.getOutputs();
- const backend::Backend *cf_backend = compiler::BackendManager::get().getControlflow();
- if (model_outputs.contains(operand_index) && factor.backend() == cf_backend)
+ const backend::Backend *builtin_backend = compiler::BackendManager::get().getBuiltin();
+ if (model_outputs.contains(operand_index) && factor.backend() == builtin_backend)
{
model_outputs.replace(operand_index, out_operand_index);
}
+ auto &operand_li_map = _lowered_graph.lower_info().operand;
+
// Find Permute information
- auto input_factor = _lowered_graph.getLowerInfo(operand_index)->def_factors().getOnlyElement();
+ auto input_factor = operand_li_map.getRawPtr(operand_index)->def_factors().getOnlyElement();
auto input_backend = input_factor.backend();
auto output_backend = factor.backend();
// NOTE Permute may not have specific layout because the layout of input and output may be
// different.
const auto permute_node_layout = ir::Layout::UNKNOWN;
// NOTE If one backend supports several layout, the backend must support Permute operation
- const backend::Backend *permute_node_backend = compiler::BackendManager::get().getControlflow();
+ const backend::Backend *permute_node_backend = compiler::BackendManager::get().getBuiltin();
if (input_backend == output_backend)
{
permute_node_backend = input_backend;
}
- const ir::operand::PermuteFactor permute_node_factor{permute_node_backend, permute_node_layout};
+ const PermuteFactor permute_node_factor{permute_node_backend, permute_node_layout};
// Update LowerInfo of input operand
- auto operand_lower_info = _lowered_graph.getLowerInfo(operand_index);
+ auto operand_lower_info = operand_li_map.getRawPtr(operand_index);
operand_lower_info->removeUsePermuteFactor(factor);
operand_lower_info->addUsePermuteFactor(permute_node_factor);
// Update LowerInfo of output operand
- auto out_operand_li = std::make_unique<ir::operand::LowerInfo>();
+ auto out_operand_li = std::make_unique<compiler::OperandLowerInfo>();
// The input and output factors of all nodes will be the same except Permute. So Tensor's
// allocators allocates memory using only the information of def permutation factor now.
// TODO Change param to permute_node_factor
out_operand_li->addDefPermuteFactor(factor);
out_operand_li->addUsePermuteFactor(factor);
- _lowered_graph.setLowerInfo(out_operand_index, std::move(out_operand_li));
+ operand_li_map.set(out_operand_index, std::move(out_operand_li));
// Insert permute operation to the graph
const auto input_layout = input_factor.layout();
auto insert_node = std::make_unique<Permute>(operand_index, out_operand_index, permute_type);
auto node_index = _graph.operations().push(std::move(insert_node));
- const auto &node = _graph.operations().at(node_index);
VERBOSE_F() << "Permute Op inserted, node index : " << node_index << std::endl;
VERBOSE_F() << " - Input (original) Operand : " << operand_index << "("
VERBOSE_F() << " - Output(inserted) Operand : " << out_operand_index << "("
<< factor.backend()->config()->id() << ")" << std::endl;
- // OpSequence
+ // Operation LowerInfo
{
- auto op_seq_index = _lowered_graph.op_seqs().emplace(node_index, permute_node_layout);
- auto &op_seq = _lowered_graph.op_seqs().at(op_seq_index);
- op_seq.setInputs(node.getInputs());
- op_seq.setOutputs(node.getOutputs());
- _lowered_graph.setLowerInfo(op_seq_index, std::make_unique<ir::operation::LowerInfo>(
- permute_node_backend, permute_node_layout));
+ auto &operation_li_map = _lowered_graph.lower_info().operation;
+ operation_li_map.set(node_index, std::make_unique<compiler::OperationLowerInfo>(
+ permute_node_backend, permute_node_layout));
}
// Update Use/Def info
#include "LoweredOperandPass.h"
#include "compiler/BackendManager.h"
#include "ir/Operand.h"
-#include "ir/operand/PermuteFactor.h"
+#include "compiler/PermuteFactor.h"
namespace onert
{
* @return ir::OperationIndex
*/
ir::OperationIndex insertPermute(const ir::OperandIndex &operand_index,
- const ir::operand::PermuteFactor &factor);
+ const PermuteFactor &factor);
};
} // namespace pass
void PermutationOperationPass::callback(const OperationIndex &, Operation &node)
{
node.accept(*this);
-};
+}
// TODO Remove this. Expanding ranks of Operand is dangerous
void PermutationOperationPass::applyExpandRanks(const Operation &node)
assert(output.getDef().valid());
const auto node_index = output.getDef();
- const auto &op_seq_index = _lowered_graph.op_seqs().getOperation(node_index);
- const auto frontend_layout = _lowered_graph.op_seqs().at(op_seq_index).getLayout();
- const auto backend_layout = _lowered_graph.getLowerInfo(op_seq_index)->layout();
+ const auto frontend_layout = _graph.layout();
+ const auto backend_layout = _lowered_graph.lower_info().operation.getRawPtr(node_index)->layout();
if (frontend_layout == backend_layout)
{
assert(output_obj.getDef().valid());
const auto node_index = output_obj.getDef();
- const auto &op_seq_index = _lowered_graph.op_seqs().getOperation(node_index);
- const auto frontend_layout = _lowered_graph.op_seqs().at(op_seq_index).getLayout();
- const auto backend_layout = _lowered_graph.getLowerInfo(op_seq_index)->layout();
+ auto &operation_li_map = _lowered_graph.lower_info().operation;
+ auto &operand_li_map = _lowered_graph.lower_info().operand;
+ const auto frontend_layout = _graph.layout();
+ const auto backend_layout = operation_li_map.getRawPtr(node_index)->layout();
if (frontend_layout == backend_layout)
{
// Permutation changing layout beyond 4-D is not supported yet
assert(output_obj.shape().rank() <= 4);
- // Divide op_seq based on target operation
+ // Change PermuteFactors of operands and the operation of target node
{
- auto &prev_op_seq = _lowered_graph.op_seqs().at(op_seq_index);
- auto &operations = _lowered_graph.graph().operations();
-
- // Create new op_seq and move information from existing op_seq to new op_seq if target
- // node is the end of op_seq
- auto it = prev_op_seq.begin();
- // Find iterator of target node in op_seq
- while (*(it++) != node_index)
- ;
- if (it != prev_op_seq.end())
- {
- const auto &target_op_idx = *it;
- const auto &target_node = operations.at(target_op_idx);
- const auto &next_op_seq_index =
- _lowered_graph.op_seqs().emplace(target_op_idx, prev_op_seq.getLayout());
- auto &next_op_seq = _lowered_graph.op_seqs().at(next_op_seq_index);
- next_op_seq.setInputs(target_node.getInputs());
- next_op_seq.setOutputs(target_node.getOutputs());
-
- std::vector<OperationIndex> remove_list;
- remove_list.emplace_back(target_op_idx);
- while (++it != prev_op_seq.end())
- {
- next_op_seq.appendOperation(target_op_idx);
- next_op_seq.setOutputs(target_node.getOutputs());
- remove_list.emplace_back(target_op_idx);
- }
+ const auto op_li = operation_li_map.getRawPtr(node_index);
+ const auto backend = op_li->backend();
- prev_op_seq.setOutputs(node.getOutputs());
- for (const auto &index : remove_list)
- {
- prev_op_seq.remove(index);
- }
-
- const auto op_seq_li = _lowered_graph.getLowerInfo(op_seq_index);
- _lowered_graph.setLowerInfo(
- next_op_seq_index,
- std::make_unique<ir::operation::LowerInfo>(op_seq_li->backend(), op_seq_li->layout()));
- }
- }
-
- // Remove target operation from op_seq and insert the target operation to new op_seq
- {
- const auto backend = _lowered_graph.getLowerInfo(op_seq_index)->backend();
+ operation_li_map.set(node_index,
+ std::make_unique<compiler::OperationLowerInfo>(backend, frontend_layout));
- // Remove target operation from op_sequence
- _lowered_graph.op_seqs().removeFromOpSequence(node_index);
-
- if (!_lowered_graph.op_seqs().exist(op_seq_index))
- {
- // Remove lowerinfo for op_seq of target operation if the op_seq does not exist
- _lowered_graph.removeLowerInfo(op_seq_index);
- }
- else
- {
- // Update op_seq of target operation if the op_seq exists
- auto &prev_op_seq = _lowered_graph.op_seqs().at(op_seq_index);
- const auto &last_node_idx = *(--prev_op_seq.end());
- const auto &last_node = _lowered_graph.graph().operations().at(last_node_idx);
- prev_op_seq.setOutputs(last_node.getOutputs());
- }
-
- // Create new op_seq and set information to the op_seq
- auto new_op_seq_index = _lowered_graph.op_seqs().emplace(node_index, frontend_layout);
- auto &new_op_seq = _lowered_graph.op_seqs().at(new_op_seq_index);
- new_op_seq.setInputs(node.getInputs());
- new_op_seq.setOutputs(node.getOutputs());
- _lowered_graph.setLowerInfo(
- new_op_seq_index, std::make_unique<ir::operation::LowerInfo>(backend, frontend_layout));
- }
-
- // Change PermuteFactors of operands of target node
- {
- const auto &op_seq_index = _lowered_graph.op_seqs().getOperation(node_index);
- const auto op_seq_li = _lowered_graph.getLowerInfo(op_seq_index);
- const auto backend = op_seq_li->backend();
- const operand::PermuteFactor removed_factor{backend, backend_layout};
- const operand::PermuteFactor new_factor{backend, frontend_layout};
+ const PermuteFactor removed_factor{backend, backend_layout};
+ const PermuteFactor new_factor{backend, frontend_layout};
for (const auto &input : node.getInputs() | Remove::DUPLICATED | Remove::UNDEFINED)
{
+ // Check if it can be removed by checking if the operand is used by another operation and
+ // it uses the same backend and layout
bool canRemove = true;
for (const auto &use : _graph.operands().at(input).getUses())
{
if (use != node_index)
{
- const auto &use_op_seq_index = _lowered_graph.op_seqs().getOperation(use);
- auto use_op_seq_li = _lowered_graph.getLowerInfo(use_op_seq_index);
- if (use_op_seq_li->backend() == backend && use_op_seq_li->layout() == backend_layout)
+ auto use_op_li = operation_li_map.getRawPtr(use);
+ if (use_op_li->backend() == backend && use_op_li->layout() == backend_layout)
{
canRemove = false;
break;
}
}
- auto lower_info = _lowered_graph.getLowerInfo(input);
+ auto input_li = operand_li_map.getRawPtr(input);
if (canRemove)
{
- lower_info->removeUsePermuteFactor(removed_factor);
+ input_li->removeUsePermuteFactor(removed_factor);
}
- lower_info->addUsePermuteFactor(new_factor);
+ input_li->addUsePermuteFactor(new_factor);
// Whether if node's input is an input of model or a constant
if (!_graph.operands().at(input).getDef().valid() &&
- (lower_info->def_factors().size() == 1 &&
- lower_info->def_factors().getOnlyElement() == removed_factor))
+ (input_li->def_factors().size() == 1 &&
+ input_li->def_factors().getOnlyElement() == removed_factor))
{
assert(_graph.getInputs().contains(input) || _graph.operands().at(input).isConstant());
- lower_info->removeDefPermuteFactor(removed_factor);
- lower_info->addDefPermuteFactor(new_factor);
+ input_li->removeDefPermuteFactor(removed_factor);
+ input_li->addDefPermuteFactor(new_factor);
}
}
for (const auto &output : node.getOutputs() | Remove::DUPLICATED | Remove::UNDEFINED)
{
- auto lower_info = _lowered_graph.getLowerInfo(output);
+ auto lower_info = operand_li_map.getRawPtr(output);
lower_info->removeDefPermuteFactor(removed_factor);
lower_info->addDefPermuteFactor(new_factor);
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Pass.h"
+
+#include "UnusedOperandEliminationPass.h"
+#include "ir/Index.h"
+#include "util/Set.h"
+#include "ir/Graph.h"
+
+/**
+ * @file UnusedOperandEliminationPass.cc
+ * @brief This file contains UnusedOperandEliminationPass class implementation
+ */
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+void UnusedOperandEliminationPass::run()
+{
+ util::Set<ir::OperandIndex> used;
+
+ _graph.operations().iterate([&](const ir::OperationIndex &, const ir::Operation &node) {
+ for (auto ind : (node.getInputs() + node.getOutputs()) | ir::Remove::UNDEFINED)
+ {
+ used.add(ind);
+ }
+ });
+
+ // Graph's inputs/outputs are always considered as used
+ for (auto ind : (_graph.getInputs() + _graph.getOutputs()) | ir::Remove::UNDEFINED)
+ {
+ used.add(ind);
+ }
+
+ _graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
+ if (!used.contains(ind))
+ {
+ VERBOSE() << "Remove unused operand " << ind << std::endl;
+ _graph.operands().remove(ind);
+ }
+ });
+}
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file UnusedOperandEliminationPass.h
+ * @brief This file contains UnusedOperandEliminationPass class
+ */
+
+#ifndef __ONERT_COMPILER_PASS_UNUSED_OPERAND_ELIMINATION_PASS_H__
+#define __ONERT_COMPILER_PASS_UNUSED_OPERAND_ELIMINATION_PASS_H__
+
+#include "Pass.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+/**
+ * @brief A pass to eliminate unused operands from the graph
+ *
+ * Remove operands that are not used by any operations, except Graph inputs/outputs.
+ *
+ */
+class UnusedOperandEliminationPass : public Pass
+{
+public:
+ using Pass::Pass;
+
+public:
+ std::string id() override { return "UnusedOperandEliminationPass"; }
+ void run() final;
+};
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_PASS_UNUSED_OPERAND_ELIMINATION_PASS_H__
}
}
-void DotBuilder::addOpSequence(const DotSubgraphInfo &subgraph_info)
-{
- _dot << "subgraph cluster_" << subgraph_info.index().value() << " {\n";
- _dot << " label=\"" << subgraph_info.label() << "\";\n";
- _dot << " style=filled;\n";
- _dot << " color=lightgrey;\n";
- _dot << " ";
- for (auto op : subgraph_info.operations())
- {
- _dot << "operation" << op.value() << "; ";
- }
- for (auto op : subgraph_info.operands())
- {
- _dot << "operand" << op.value() << "; ";
- }
- _dot << "\n";
- _dot << "}\n";
-}
-
void DotBuilder::writeDot(std::ostream &os)
{
os << "digraph D {\n"
#include "OperationNode.h"
#include "OperandNode.h"
-#include "DotSubgraphInfo.h"
using Operation = onert::ir::Operation;
using Object = onert::ir::Operand;
public:
void update(const Node &dotinfo);
- void addOpSequence(const DotSubgraphInfo &subgraph_info);
void writeDot(std::ostream &os);
#include "DotDumper.h"
#include "DotBuilder.h"
-#include "DotSubgraphInfo.h"
-#include "ir/OpSequence.h"
#include "ir/OperationIndexMap.h"
#include "backend/Backend.h"
#include "backend/IConfig.h"
else
{
showing_cond =
- !object.isConstant() || (_graph.getInputs() + _graph.getOutputs()).contains(index);
+ !object.isConstant() || (_graph.getInputs() + _graph.getOutputs()).contains(index);
}
if (showing_cond)
{
std::string fillcolor = "";
if (_lowered_graph)
{
- auto lower_info = _lowered_graph->getLowerInfo(index);
+ auto lower_info = _lowered_graph->lower_info().operand.getRawPtr(index);
const auto &def_factors = lower_info->def_factors();
if (def_factors.size() > 0)
{
if (_lowered_graph)
{
- const auto &op_seqs = _lowered_graph->op_seqs();
- op_seqs.iterate([&](const ir::OpSequenceIndex &index, const ir::OpSequence &op_seq) {
- const auto lower_info = _lowered_graph->getLowerInfo(index);
- auto fillcolor = backend_to_fillcolor(lower_info->backend());
- std::string label =
- std::to_string(index.value()) + " [" + lower_info->backend()->config()->id() + "]";
- DotSubgraphInfo subgraph_info{index, op_seq, shown_operand_set, _graph.operations()};
- subgraph_info.label(label);
- subgraph_info.fillcolor(fillcolor);
- dot_builder.addOpSequence(subgraph_info);
-
- // Set fillcolor of all operations in the op_seq
- for (const auto &op_idx : op_seq.operations())
+ _graph.operations().iterate([&](const ir::OperationIndex &index, const ir::Operation &) {
+ const auto lower_info = _lowered_graph->lower_info().operation.getRawPtr(index);
+ if (lower_info)
{
- auto found = operation_nodes.find(op_idx);
- if (found != operation_nodes.end())
+ auto fillcolor = backend_to_fillcolor(lower_info->backend());
+ std::string backend_label = "[" + lower_info->backend()->config()->id() + "]";
+ auto itr = operation_nodes.find(index);
+ if (itr != operation_nodes.end())
{
- auto &&op = found->second;
- op->setAttribute("fillcolor", fillcolor);
+ auto &node = itr->second;
+ node->setAttribute("label", node->getAttribute("label") + "\n" + backend_label);
+ node->setAttribute("fillcolor", fillcolor);
}
}
});
public:
DotDumper(const ir::Graph &graph, Level level)
- : _lowered_graph{nullptr}, _graph(graph), _level{level}
+ : _lowered_graph{nullptr}, _graph(graph), _level{level}
{
}
DotDumper(const compiler::LoweredGraph *lowered_graph, Level level)
- : _lowered_graph{lowered_graph}, _graph(_lowered_graph->graph()), _level{level}
+ : _lowered_graph{lowered_graph}, _graph(_lowered_graph->graph()), _level{level}
{
}
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DotSubgraphInfo.h"
-
-#include <sstream>
-
-namespace onert
-{
-namespace dumper
-{
-namespace dot
-{
-
-DotSubgraphInfo::DotSubgraphInfo(const ir::OpSequenceIndex &index, const ir::OpSequence &op_seq,
- const util::Set<ir::OperandIndex> &shown_operands,
- const ir::Operations &operations_ctx)
- : _index{index}
-{
- for (const auto &op_idx : op_seq.operations())
- {
- _operations.insert(op_idx);
- const auto &node = operations_ctx.at(op_idx);
- for (auto o : node.getInputs())
- {
- // Must be a shown operand, not op_seq's inputs
- if (shown_operands.contains(o) && !op_seq.getInputs().contains(o))
- {
- _operands.insert(o);
- }
- }
- for (auto o : node.getOutputs())
- {
- // Must be a shown operand, not op_seq's inputs
- if (shown_operands.contains(o) && !op_seq.getOutputs().contains(o))
- {
- _operands.insert(o);
- }
- }
- }
-}
-
-} // namespace dot
-} // namespace dumper
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_CORE_DUMPER_DOT_DOT_SUBGRAPH_INFO_H__
-#define __ONERT_CORE_DUMPER_DOT_DOT_SUBGRAPH_INFO_H__
-
-#include <unordered_set>
-
-#include "ir/Index.h"
-#include <ir/Operations.h>
-#include "ir/OpSequence.h"
-#include "util/Set.h"
-
-namespace onert
-{
-namespace dumper
-{
-namespace dot
-{
-
-class DotSubgraphInfo
-{
-public:
- DotSubgraphInfo(const ir::OpSequenceIndex &index, const ir::OpSequence &op_seq,
- const util::Set<ir::OperandIndex> &shown_operands,
- const ir::Operations &operations_ctx);
-
- ir::OpSequenceIndex index() const { return _index; }
- std::string label() const { return _label; }
- void label(const std::string &val) { _label = val; }
- std::string fillcolor() const { return _fillcolor; }
- void fillcolor(const std::string &val) { _fillcolor = val; }
- const std::unordered_set<ir::OperationIndex> &operations() const { return _operations; }
- const std::unordered_set<ir::OperandIndex> &operands() const { return _operands; }
-
-private:
- ir::OpSequenceIndex _index;
- std::string _label;
- std::string _fillcolor;
- std::unordered_set<ir::OperationIndex> _operations;
- std::unordered_set<ir::OperandIndex> _operands;
-};
-
-} // namespace dot
-} // namespace dumper
-} // namespace onert
-
-#endif // __ONERT_CORE_DUMPER_DOT_DOT_SUBGRAPH_INFO_H__
#include "OperandNode.h"
#include "ir/Graph.h"
-#include "ir/operand/LowerInfo.h"
namespace onert
{
const std::string Operand::BG_COLOR_SCHEME = "set18";
Operand::Operand(const ir::OperandIndex &index, Type type)
- : Node{"operand" + std::to_string(index.value())}
+ : Node{"operand" + std::to_string(index.value())}
{
{
auto type_to_shape = [](Type type) {
*
* @param[in] index Operand index
* @param[in] type Operand type
- * @param[in] lower_info Operand LowerInfo
*/
Operand(const ir::OperandIndex &index, Type type);
#include "OperationNode.h"
#include "ir/Graph.h"
-#include "ir/operation/LowerInfo.h"
#include "backend/IConfig.h"
#include "backend/Backend.h"
const std::string Operation::BG_COLOR_SCHEME = "pastel18";
Operation::Operation(const ir::OperationIndex &index, const ir::Operation &node)
- : Node{"operation" + std::to_string(index.value())}
+ : Node{"operation" + std::to_string(index.value())}
{
setAttribute("label", std::to_string(index.value()) + " : " + node.name());
setAttribute("shape", OPERATION_SHAPE);
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GraphDumper.h"
+
+#include "ir/Graph.h"
+#include "compiler/LoweredGraph.h"
+#include "util/logging.h"
+#include "misc/string_helpers.h"
+
+namespace onert
+{
+namespace dumper
+{
+namespace text
+{
+
+namespace
+{
+
+std::string formatOperandIndexSequence(const ir::OperandIndexSequence &seq)
+{
+ std::vector<std::string> strs;
+ for (auto ind : seq)
+ strs.push_back(dumper::text::formatOperandBrief(ind));
+ return nnfw::misc::join(strs.begin(), strs.end(), ", ");
+}
+
+} // namespace
+
+std::string formatOperandBrief(ir::OperandIndex ind)
+{
+ std::stringstream ss;
+ ss << ind;
+ return ss.str();
+}
+
+std::string formatOperand(const ir::Graph &, ir::OperandIndex ind)
+{
+ std::stringstream ss;
+ ss << ind;
+ // TODO Print shape, type and maybe more
+ return ss.str();
+}
+
+std::string formatOperation(const ir::Graph &graph, ir::OperationIndex ind)
+{
+ std::stringstream ss;
+ const auto &op = graph.operations().at(ind);
+
+ ss << formatOperandIndexSequence(op.getOutputs());
+ ss << " = ";
+ ss << ind << "_" << op.name() << "(";
+ ss << formatOperandIndexSequence(op.getInputs());
+ ss << ")";
+ return ss.str();
+}
+
+void dumpGraph(const ir::Graph &graph)
+{
+ VERBOSE(GraphDumper) << "{\n";
+ auto ops_topol = graph.topolSortOperations();
+ for (auto op_ind : ops_topol)
+ {
+ VERBOSE(GraphDumper) << " " << formatOperation(graph, op_ind) << "\n";
+ }
+ VERBOSE(GraphDumper) << "}\n";
+ VERBOSE(GraphDumper) << std::endl;
+}
+
+void dumpLoweredGraph(const compiler::LoweredGraph &lgraph)
+{
+ // TODO Graph dump with backend info
+ dumpGraph(lgraph.graph());
+}
+
+} // namespace text
+} // namespace dumper
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_DUMPER_TEXT_GRAPH_DUMPER_H__
+#define __ONERT_DUMPER_TEXT_GRAPH_DUMPER_H__
+
+#include <ir/Index.h>
+
+namespace onert
+{
+namespace ir
+{
+class Graph;
+}
+} // namespace onert
+
+namespace onert
+{
+namespace compiler
+{
+class LoweredGraph;
+}
+} // namespace onert
+
+namespace onert
+{
+namespace dumper
+{
+namespace text
+{
+
+std::string formatOperandBrief(ir::OperandIndex ind);
+std::string formatOperand(const ir::Graph &, ir::OperandIndex ind);
+std::string formatOperation(const ir::Graph &graph, ir::OperationIndex ind);
+void dumpGraph(const ir::Graph &graph);
+void dumpLoweredGraph(const compiler::LoweredGraph &lgraph);
+
+} // namespace text
+} // namespace dumper
+} // namespace onert
+
+#endif // __ONERT_DUMPER_TEXT_GRAPH_DUMPER_H__
{
auto &job = _waiting_jobs[id];
assert(job != nullptr);
- auto &op_seq = _lowered_graph->op_seqs().at(_job_to_op_seq[job->index()]);
- auto rank = calculateRank(op_seq.operations());
+ auto rank = calculateRank({_job_to_op[job->index()]});
_ready_jobs.emplace(rank, std::move(job));
}
}
DataflowExecutor::DataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+ backend::BackendContexts &&backend_contexts,
const compiler::TensorRegistries &tensor_regs,
compiler::CodeMap &&code_map,
const util::TracingCtx *tracing_ctx)
- : ExecutorBase{std::move(lowered_graph), tensor_regs, tracing_ctx},
- _code_map{std::move(code_map)}
+ : ExecutorBase{std::move(lowered_graph), std::move(backend_contexts), tensor_regs, tracing_ctx},
+ _code_map{std::move(code_map)}
{
VERBOSE(DataflowExecutor) << "Constructing Dataflow Executor" << std::endl;
- const auto &op_seqs = _lowered_graph->op_seqs();
- // Assign jobs convert OpSequenceIndex to job index(uint32_t)
+ // Assign jobs convert OperationIndex to job index(uint32_t)
uint32_t next_job_index = 0;
- std::unordered_map<ir::OpSequenceIndex, uint32_t> op_seq_to_job;
- op_seqs.iterate([&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &) {
- VERBOSE(DataflowExecutor) << "Create a job #" << next_job_index << " with OpSequenceIndex "
- << op_seq_index.value() << std::endl;
+ std::unordered_map<ir::OperationIndex, uint32_t> op_to_job;
+ const auto &operations = _lowered_graph->graph().operations();
+ operations.iterate([&](const ir::OperationIndex &op_ind, const ir::Operation &) {
+ VERBOSE(DataflowExecutor) << "Create a job " << next_job_index << " with Operation " << op_ind
+ << std::endl;
_finished_jobs.emplace_back(
- std::make_unique<Job>(next_job_index, _code_map.at(op_seq_index).fn_seq.get()));
- op_seq_to_job[op_seq_index] = next_job_index++;
+ std::make_unique<Job>(next_job_index, _code_map.at(op_ind).fn_seq.get()));
+ op_to_job[op_ind] = next_job_index++;
});
_waiting_jobs.resize(next_job_index);
_output_info.resize(next_job_index);
_initial_input_info.resize(next_job_index, 0);
- op_seqs.iterate([&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
- auto job_index = op_seq_to_job[op_seq_index];
- for (auto output : op_seq.getOutputs())
+ operations.iterate([&](const ir::OperationIndex &op_ind, const ir::Operation &op) {
+ auto job_index = op_to_job[op_ind];
+ for (auto output : op.getOutputs())
{
// Update output and input info
- op_seqs.iterate(
- [&](const ir::OpSequenceIndex &op_seq_cur_index, const ir::OpSequence &op_seq_cur) {
- if (op_seq_cur.getInputs().contains(output))
- {
- auto dep_index = op_seq_to_job[op_seq_cur_index];
- ++_initial_input_info[dep_index];
- _output_info[job_index].push_back(dep_index);
- }
- });
+ operations.iterate([&](const ir::OperationIndex &op_cur_ind, const ir::Operation &op_cur) {
+ if (op_cur.getInputs().contains(output))
+ {
+ auto dep_index = op_to_job[op_cur_ind];
+ ++_initial_input_info[dep_index];
+ _output_info[job_index].push_back(dep_index);
+ }
+ });
}
});
- for (const auto &s : op_seq_to_job)
- _job_to_op_seq.emplace(s.second, s.first);
+ for (const auto &s : op_to_job)
+ _job_to_op.emplace(s.second, s.first);
_input_info = _initial_input_info;
}
auto job = std::move((_ready_jobs.begin())->second);
_ready_jobs.erase(_ready_jobs.begin());
auto job_index = job->index();
- VERBOSE(DataflowExecutor) << "Run job #" << job_index << std::endl;
+ VERBOSE(DataflowExecutor) << "Run job " << job_index << std::endl;
- auto op_seq_index = _job_to_op_seq[job_index];
- auto op_seq = &_lowered_graph->op_seqs().at(op_seq_index);
- const backend::Backend *backend =
- _lowered_graph->getLowerInfo()->op_seq.at(op_seq_index)->backend();
+ auto op_ind = _job_to_op[job_index];
+ const backend::Backend *backend = _lowered_graph->lower_info().operation.at(op_ind).backend();
- _subject.notifyJobBegin(this, profiling_subg_index, op_seq, backend);
+ _subject.notifyJobBegin(this, profiling_subg_index, op_ind, backend);
job->fn_seq()->initRunning();
// check if FunctionSequence needs to handle dynamic tensor
- bool handle_dynamic_tensor = op_seq->has_dynamic_tensor() || dynamic_input_exists;
+ bool handle_dynamic_tensor =
+ _lowered_graph->getHasDynamicTensor(op_ind) || dynamic_input_exists;
job->fn_seq()->enableDynamicShapeInferer(handle_dynamic_tensor);
job->run();
- _subject.notifyJobEnd(this, profiling_subg_index, op_seq, backend);
+ _subject.notifyJobEnd(this, profiling_subg_index, op_ind, backend);
notify(job_index);
_finished_jobs[job_index] = std::move(job);
}
*
* @param lowered_graph LoweredGraph object
* @param tensor_builders Tensor builders that are currently used
- * @param code_map OpSequence and its code map
+ * @param code_map @c ir::Operation and its code map
*/
DataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+ backend::BackendContexts &&backend_contexts,
const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map,
const util::TracingCtx *tracing_ctx);
std::multimap<int64_t, std::unique_ptr<Job>, std::greater<int64_t>> _ready_jobs;
/// @brief Which job runs which op and function.
- std::unordered_map<uint32_t, ir::OpSequenceIndex> _job_to_op_seq;
+ std::unordered_map<uint32_t, ir::OperationIndex> _job_to_op;
};
} // namespace exec
So, only when all inputs are static, we can skip dynamic shape inference.
*/
- if ((!lhs->is_dynamic()) && (!rhs->is_dynamic()))
- return;
-
auto output_idx = op.getOutputs().at(0);
auto output = _tensor_registry->getITensor(output_idx);
+ if ((currently_static(lhs) && currently_static(rhs)) && previously_static(output))
+ return;
+
ir::Shape new_shape = shape_inference::inferEltwiseShape(lhs_shape, rhs_shape);
output->applyShape(new_shape);
const auto &input = _tensor_registry->getITensor(input_idx);
const auto cluster_idx{
- op.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)};
+ op.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)};
const auto &cluster = _tensor_registry->getITensor(cluster_idx);
assert(cluster->is_constant());
assert(cluster_buf);
ir::Shape new_shape =
- shape_inference::inferBCQFullyConnectedShape(input_shape, cluster_shape, cluster_buf);
+ shape_inference::inferBCQFullyConnectedShape(input_shape, cluster_shape, cluster_buf);
auto output_ind = op.getOutputs().at(0);
auto output = _tensor_registry->getITensor(output_ind);
assert(shape); // It shouldn't be 0.
auto output_shape = shape_inference::inferBroadcastToShape(
- shape->getShape(), reinterpret_cast<const int32_t *>(shape->buffer()));
+ shape->getShape(), reinterpret_cast<const int32_t *>(shape->buffer()));
// set output shape and output buffer
output->applyShape(output_shape);
{
auto isConcatible = [](const backend::ITensor *input1, const backend::ITensor *input2,
int32_t axis) {
- if (input1->num_dimensions() != input2->num_dimensions())
+ auto shape1 = input1->getShape();
+ auto shape2 = input2->getShape();
+ if (shape1.rank() != shape2.rank())
return false;
- for (size_t i = 0; i < input1->num_dimensions(); i++)
+ for (int i = 0; i < shape1.rank(); i++)
{
- auto positive_axis = (axis >= 0) ? axis : axis + input1->num_dimensions();
+ auto positive_axis = (axis >= 0) ? axis : axis + input1->getShape().rank();
if (i != positive_axis)
- if (input1->dimension(i) != input2->dimension(i))
+ if (shape1.dim(i) != shape2.dim(i))
return false;
}
assert(axis->buffer());
int32_t axis_value =
- (axis_type == ir::DataType::INT32)
- ? reinterpret_cast<const int32_t *>(axis->buffer())[0]
- : static_cast<int32_t>(reinterpret_cast<const int64_t *>(axis->buffer())[0]);
+ (axis_type == ir::DataType::INT32)
+ ? reinterpret_cast<const int32_t *>(axis->buffer())[0]
+ : static_cast<int32_t>(reinterpret_cast<const int64_t *>(axis->buffer())[0]);
auto output_shape = shape_inference::inferExpandDimsShape(input_shape, axis_value);
const auto &dims_shape = shape->getShape();
auto output_shape = ((dims_type == ir::DataType::INT32)
- ? shape_inference::inferFillShape<int32_t>(
- dims_shape, reinterpret_cast<const int32_t *>(dims_buf))
- : shape_inference::inferFillShape<int64_t>(
- dims_shape, reinterpret_cast<const int64_t *>(dims_buf)));
+ ? shape_inference::inferFillShape<int32_t>(
+ dims_shape, reinterpret_cast<const int32_t *>(dims_buf))
+ : shape_inference::inferFillShape<int64_t>(
+ dims_shape, reinterpret_cast<const int64_t *>(dims_buf)));
output->applyShape(output_shape);
assert(output->buffer() != nullptr);
auto output = _tensor_registry->getITensor(output_index);
const auto output_state_out_index{
- op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
+ op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
const auto cell_state_out_index{op.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
const auto input_shape = input->getShape();
const auto input_to_output_weights_index{
- op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
+ op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
const auto input_to_output_weights = _tensor_registry->getITensor(input_to_output_weights_index);
const auto input_to_output_weights_shape = input_to_output_weights->getShape();
const auto recurrent_to_output_weights_index{
- op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
+ op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
const auto recurrent_to_output_weights =
- _tensor_registry->getITensor(recurrent_to_output_weights_index);
+ _tensor_registry->getITensor(recurrent_to_output_weights_index);
const auto recurrent_to_output_weights_shape = recurrent_to_output_weights->getShape();
// re-sizing outputs
const int n_batch =
- (input_shape.rank() == 3 && op.param().time_major) ? input_shape.dim(1) : input_shape.dim(0);
+ (input_shape.rank() == 3 && op.param().time_major) ? input_shape.dim(1) : input_shape.dim(0);
const int n_cell = input_to_output_weights_shape.dim(0);
const int n_output = recurrent_to_output_weights_shape.dim(1);
if (input_shape.rank() == 3)
if (scratch_buffer != nullptr)
{
const auto input_to_input_weights_index{
- op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)};
+ op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)};
const auto recurrent_to_input_weights_index{
- op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)};
+ op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)};
const auto input_to_input_weights_shape =
- _tensor_registry->getITensor(input_to_input_weights_index)->getShape();
+ _tensor_registry->getITensor(input_to_input_weights_index)->getShape();
bool has_input_to_input_weights =
- input_to_input_weights_shape.dim(0) != 0 && input_to_input_weights_shape.dim(1) != 0;
+ input_to_input_weights_shape.dim(0) != 0 && input_to_input_weights_shape.dim(1) != 0;
const auto recurrent_to_input_weights_shape =
- _tensor_registry->getITensor(recurrent_to_input_weights_index)->getShape();
- bool has_recurrent_to_input_weights = recurrent_to_input_weights_shape.dim(0) != 0 &&
- recurrent_to_input_weights_shape.dim(1) != 0;
+ _tensor_registry->getITensor(recurrent_to_input_weights_index)->getShape();
+ bool has_recurrent_to_input_weights =
+ recurrent_to_input_weights_shape.dim(0) != 0 && recurrent_to_input_weights_shape.dim(1) != 0;
// NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG).
// true: no CIFG
assert(pad_buf);
auto output_shape =
- shape_inference::inferPadShape(input->getShape(), pad_buf, pad->getShape().num_elements());
+ shape_inference::inferPadShape(input->getShape(), pad_buf, pad->getShape().num_elements());
// change output shape and reallocate output tensor memory
output->applyShape(output_shape);
if (output->data_type() == ir::DataType::FLOAT32)
{
new_shape =
- shape_inference::inferRangeShape<float>(*reinterpret_cast<float *>(start_tensor->buffer()),
- *reinterpret_cast<float *>(limit_tensor->buffer()),
- *reinterpret_cast<float *>(delta_tensor->buffer()));
+ shape_inference::inferRangeShape<float>(*reinterpret_cast<float *>(start_tensor->buffer()),
+ *reinterpret_cast<float *>(limit_tensor->buffer()),
+ *reinterpret_cast<float *>(delta_tensor->buffer()));
}
else if (output->data_type() == ir::DataType::INT32)
{
new_shape = shape_inference::inferRangeShape<int32_t>(
- *reinterpret_cast<int32_t *>(start_tensor->buffer()),
- *reinterpret_cast<int32_t *>(limit_tensor->buffer()),
- *reinterpret_cast<int32_t *>(delta_tensor->buffer()));
+ *reinterpret_cast<int32_t *>(start_tensor->buffer()),
+ *reinterpret_cast<int32_t *>(limit_tensor->buffer()),
+ *reinterpret_cast<int32_t *>(delta_tensor->buffer()));
}
output->applyShape(new_shape);
assert(output->buffer() != nullptr);
assert(new_shape_buf);
auto output_shape = shape_inference::inferReshapeShape(
- new_shape_buf, new_shape->getShape().num_elements(), input->getShape().num_elements());
+ new_shape_buf, new_shape->getShape().num_elements(), input->getShape().num_elements());
// if shape is changed, change output shape and reallocate output tensor memory
if (output_shape != output->getShape() || output->buffer() == nullptr)
width_out = op.param().width_out;
}
auto output_shape =
- shape_inference::inferResizeBilinearShape(input->getShape(), height_out, width_out);
+ shape_inference::inferResizeBilinearShape(input->getShape(), height_out, width_out);
// if shape is changed, change output shape and reallocate output tensor memory
if (output_shape != output->getShape() || output->buffer() == nullptr)
// Select output shpae
ir::Shape new_shape =
- shape_inference::inferSelectShape(input_cond_shape, input_true_shape, input_false_shape);
+ shape_inference::inferSelectShape(input_cond_shape, input_true_shape, input_false_shape);
auto output_ind = op.getOutputs().at(0);
auto output = _tensor_registry->getITensor(output_ind);
auto padding_data = reinterpret_cast<int32_t *>(padding->buffer());
ir::Shape new_shape = shape_inference::inferSpaceToBatchNDShape(
- input_shape, block_shape_shape, padding_shape, block_shape_data, padding_data);
+ input_shape, block_shape_shape, padding_shape, block_shape_data, padding_data);
output->applyShape(new_shape);
assert(output->buffer() != nullptr);
const auto rank = input_shape.rank();
auto op_params = shape_inference::buildStridedSliceParams(
- reinterpret_cast<uint32_t *>(starts->buffer()), reinterpret_cast<uint32_t *>(ends->buffer()),
- reinterpret_cast<uint32_t *>(strides->buffer()), begin_mask, end_mask, shrink_axis_mask,
- rank);
+ reinterpret_cast<uint32_t *>(starts->buffer()), reinterpret_cast<uint32_t *>(ends->buffer()),
+ reinterpret_cast<uint32_t *>(strides->buffer()), begin_mask, end_mask, shrink_axis_mask, rank);
auto output_index = op.getOutputs().at(0);
auto output = _tensor_registry->getITensor(output_index);
ir::Shape output_shape =
- onert::shape_inference::inferStridedSliceShape(input_shape, op_params, rank);
+ onert::shape_inference::inferStridedSliceShape(input_shape, op_params, rank);
output->applyShape(output_shape);
assert(output->buffer() != nullptr);
auto multiplier_buffer = reinterpret_cast<const int32_t *>(multiplier->buffer());
assert(multiplier_buffer);
- auto output_shape =
- shape_inference::inferTileShape(input_shape, multiplier_buffer, multiplier->dimension(0));
+ auto mult_shape = multiplier->getShape();
+ auto output_shape = shape_inference::inferTileShape(
+ input_shape, multiplier_buffer, mult_shape.rank() == 0 ? 1 : mult_shape.dim(0));
// set output shape and output buffer
output->applyShape(output_shape);
ir::Shape new_shape;
// TODO Change perm->dimension(0) == 0 to perm->num_elements() == 0
- if (perm->dimension(0) == 0) // This condition means that perm is (n-1...0)
+ if (perm->getShape().dim(0) == 0) // This condition means that perm is (n-1...0)
{
// Call by (n-1...0)
new_shape = shape_inference::inferTransposeShape(input_shape, nullptr, 0);
else
{
// Check rank
- if (input->num_dimensions() != perm->getShape().num_elements())
+ if (static_cast<size_t>(input->getShape().rank()) != perm->getShape().num_elements())
{
throw std::runtime_error("DynamicShapeInferer failed, bad rank size: " +
std::to_string(perm->getShape().num_elements()));
// set output shape, based on input and params
const auto perm_buffer = reinterpret_cast<const int32_t *>(perm->buffer());
- new_shape = shape_inference::inferTransposeShape(input_shape, perm_buffer, perm->dimension(0));
+ new_shape =
+ shape_inference::inferTransposeShape(input_shape, perm_buffer, perm->getShape().dim(0));
}
output->applyShape(new_shape);
assert(output->buffer() != nullptr);
{
public:
explicit ExecTime(const std::vector<const backend::Backend *> &backends)
- : _json(backends, _measurements)
+ : _json(backends, _measurements)
{
}
_io_desc.dynamic_input_shapes[index] = new_shape;
VERBOSE(Execution) << "Model input shape will be changed at the start of execute()"
- << "(index: " << index.value() << ")" << std::endl;
+ << "(index: " << index << ")" << std::endl;
}
// TODO Remove default parameter
// note: input_shape_sig contains shape passed by nnfw_set_input_tensorinfo()
{
auto input_shape_sig = _io_desc.dynamic_input_shapes.find(index);
- auto size_required = (input_shape_sig != _io_desc.dynamic_input_shapes.end())
- ? input_shape_sig->second.num_elements() *
- onert::ir::sizeOfDataType(info.typeInfo().type())
- : info.total_size();
+ auto size_required =
+ (input_shape_sig != _io_desc.dynamic_input_shapes.end())
+ ? input_shape_sig->second.num_elements() * onert::ir::sizeOfDataType(info.typeInfo().type())
+ : info.total_size();
if (length < size_required)
{
{
const auto &input_desc = _io_desc.inputs.at(index.value());
_io_desc.inputs.at(index.value()) =
- std::make_unique<InputDesc>(input_desc->info, input_desc->buffer, input_desc->size, layout);
+ std::make_unique<InputDesc>(input_desc->info, input_desc->buffer, input_desc->size, layout);
}
void Execution::setOutputLayout(const ir::IOIndex &index, ir::Layout layout)
{
const auto &output_desc = _io_desc.outputs.at(index.value());
- _io_desc.outputs.at(index.value()) = std::make_unique<OutputDesc>(
- output_desc->info, output_desc->buffer, output_desc->size, layout);
+ _io_desc.outputs.at(index.value()) =
+ std::make_unique<OutputDesc>(output_desc->info, output_desc->buffer, output_desc->size, layout);
}
void Execution::execute()
auto itr = _io_desc.dynamic_input_shapes.find(ind);
if (itr == _io_desc.dynamic_input_shapes.end())
{
- auto operand_idx = primary_subgraph().getInputs().at(ind.value());
+ auto operand_idx = primary_subgraph().getInputs().at(ind);
return primary_subgraph().operands().at(operand_idx).shape();
}
else
}
}
-void ExecutionObservee::notifyJobBegin(IExecutor *executor, ir::SubgraphIndex index,
- const ir::OpSequence *op_seq,
- const backend::Backend *backend)
+void ExecutionObservee::notifyJobBegin(IExecutor *executor, ir::SubgraphIndex subg_ind,
+ ir::OperationIndex op_ind, const backend::Backend *backend)
{
for (auto &o : _observers)
{
- o->handleJobBegin(executor, index, op_seq, backend);
+ o->handleJobBegin(executor, subg_ind, op_ind, backend);
}
}
-void ExecutionObservee::notifyJobEnd(IExecutor *executor, ir::SubgraphIndex index,
- const ir::OpSequence *op_seq, const backend::Backend *backend)
+void ExecutionObservee::notifyJobEnd(IExecutor *executor, ir::SubgraphIndex subg_ind,
+ ir::OperationIndex op_ind, const backend::Backend *backend)
{
for (auto &o : _observers)
{
- o->handleJobEnd(executor, index, op_seq, backend);
+ o->handleJobEnd(executor, subg_ind, op_ind, backend);
}
}
void add(std::unique_ptr<IExecutionObserver> observer);
void notifySubgraphBegin(ir::SubgraphIndex ind);
void notifySubgraphEnd(ir::SubgraphIndex ind);
- void notifyJobBegin(IExecutor *executor, ir::SubgraphIndex index, const ir::OpSequence *op_seq,
+ void notifyJobBegin(IExecutor *executor, ir::SubgraphIndex subg_ind, ir::OperationIndex op_ind,
const backend::Backend *backend);
- void notifyJobEnd(IExecutor *executor, ir::SubgraphIndex index, const ir::OpSequence *op_seq,
+ void notifyJobEnd(IExecutor *executor, ir::SubgraphIndex subg_ind, ir::OperationIndex op_ind,
const backend::Backend *backend);
private:
#include "util/logging.h"
#include "exec/IExecutor.h"
#include "misc/polymorphic_downcast.h"
-#include "ir/OpSequence.h"
+#include "ir/Operation.h"
#include "util/EventWriter.h"
-#include "util/Utils.h"
namespace
{
-void setUserData(const onert::ir::Graph &g, const onert::ir::OpSequence *op_seq,
+void setUserData(const onert::ir::Graph &g, const onert::ir::Operation *op,
decltype(EventCollector::Event::userData) &data)
{
- if (op_seq->size() == 0)
- return;
-
// From a tensor of shape [a, b, c], this will return a string "shape(a b c)".
// String like "[1, 2, 3]" looks better but this will be considered as a list in Json
// so text search (e.g., Ctrl-F in Chrome Tracing) could be difficult
return shape_str;
};
- const auto &first_op_idx = op_seq->operations().at(0);
- const auto &first_op_node = g.operations().at(first_op_idx);
-
- auto &inputs = first_op_node.getInputs();
+ auto &inputs = op->getInputs();
auto size = inputs.size();
for (size_t i = 0; i < size; i++)
{
{
void ProfileObserver::handleJobBegin(onert::exec::IExecutor *, ir::SubgraphIndex,
- const ir::OpSequence *, const onert::backend::Backend *backend)
+ ir::OperationIndex, const onert::backend::Backend *backend)
{
_timer = backend->config()->timer();
if (_timer == nullptr)
_timer->handleBegin();
}
-void ProfileObserver::handleJobEnd(IExecutor *exec, ir::SubgraphIndex, const ir::OpSequence *op_seq,
- const backend::Backend *backend)
+void ProfileObserver::handleJobEnd(IExecutor *exec, ir::SubgraphIndex,
+ const ir::OperationIndex op_ind, const backend::Backend *backend)
{
_timer->handleEnd();
const auto timer_res = _timer->getTime();
- // NOTE This assumes there is just one operation in a op_seq
- const auto &node = _graph.operations().at(op_seq->operations().at(0));
+ // NOTE This assumes there is just one operation in a op
+ const auto &node = _graph.operations().at(op_ind);
auto node_name = node.name();
VERBOSE(ProfileInfo) << "Time for " << node_name << " : " << timer_res << std::endl;
TracingObserver::TracingObserver(const std::string &filepath, const ir::Graph &graph,
const util::TracingCtx *tracing_ctx)
- : _recorder{std::make_unique<EventRecorder>()}, _collector{_recorder.get()}, _graph{graph},
- _tracing_ctx{tracing_ctx}
+ : _recorder{std::make_unique<EventRecorder>()}, _collector{_recorder.get()}, _graph{graph},
+ _tracing_ctx{tracing_ctx}
{
- // TODO Remove below after using _tracing_ctx
- UNUSED_RELEASE(_tracing_ctx);
-
_event_writer = EventWriter::get(filepath);
_event_writer->startToUse();
}
void TracingObserver::handleSubgraphBegin(ir::SubgraphIndex subg_ind)
{
- // TODO Write subg_ind into profling result
- UNUSED_RELEASE(subg_ind);
- _collector.onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "runtime", "Graph"});
+ _collector.onEvent(
+ EventCollector::SubgEvent{_tracing_ctx, EventCollector::Edge::BEGIN, subg_ind.value()});
}
void TracingObserver::handleJobBegin(IExecutor *, ir::SubgraphIndex subg_ind,
- const ir::OpSequence *op_seq, const backend::Backend *backend)
+ ir::OperationIndex op_ind, const backend::Backend *backend)
{
- // TODO Write subg_ind into profling result
- UNUSED_RELEASE(subg_ind);
-
std::string backend_id = backend->config()->id();
-
- auto ev = EventCollector::Event{EventCollector::Edge::BEGIN, backend_id,
- opSequenceTag(op_seq, _graph.operations())};
+ const auto &op = _graph.operations().at(op_ind);
+ auto ev = EventCollector::OpSeqEvent{_tracing_ctx, EventCollector::Edge::BEGIN,
+ subg_ind.value(), backend_id,
+ op_ind.value(), op.name()};
// add shape of inputs
- setUserData(_graph, op_seq, ev.userData);
-
+ setUserData(_graph, &op, ev.userData);
_collector.onEvent(ev);
}
void TracingObserver::handleJobEnd(IExecutor *, ir::SubgraphIndex subg_ind,
- const ir::OpSequence *op_seq, const backend::Backend *backend)
+ ir::OperationIndex op_ind, const backend::Backend *backend)
{
- // TODO Write subg_ind into profling result
- UNUSED_RELEASE(subg_ind);
-
std::string backend_id = backend->config()->id();
- _collector.onEvent(EventCollector::Event{EventCollector::Edge::END, backend_id,
- opSequenceTag(op_seq, _graph.operations())});
+ _collector.onEvent(EventCollector::OpSeqEvent{_tracing_ctx, EventCollector::Edge::END,
+ subg_ind.value(), backend_id, op_ind.value(),
+ _graph.operations().at(op_ind).name()});
}
void TracingObserver::handleSubgraphEnd(ir::SubgraphIndex subg_ind)
{
- // TODO Write subg_ind into profling result
- UNUSED_RELEASE(subg_ind);
-
- _collector.onEvent(EventCollector::Event{EventCollector::Edge::END, "runtime", "Graph"});
-}
-
-std::string TracingObserver::opSequenceTag(const ir::OpSequence *op_seq,
- const ir::Operations &operations)
-{
- if (op_seq->size() == 0)
- return "Empty OpSequence";
-
- const auto &first_op_idx = op_seq->operations().at(0);
- const auto &first_op_node = operations.at(first_op_idx);
- std::string tag = "$" + std::to_string(first_op_idx.value());
- tag += " " + first_op_node.name();
- if (op_seq->size() > 1)
- {
- tag += " (+" + std::to_string(op_seq->size() - 1) + ")";
- }
- return tag;
+ _collector.onEvent(
+ EventCollector::SubgEvent{_tracing_ctx, EventCollector::Edge::END, subg_ind.value()});
}
} // namespace exec
#include "exec/IFunction.h"
#include "ir/Index.h"
-#include "ir/OpSequence.h"
+#include "ir/Operation.h"
#include "ExecTime.h"
#include "util/ITimer.h"
#include "exec/IExecutor.h"
/// @brief Invoked just before model (not individual operation) execution begins
virtual void handleSubgraphBegin(ir::SubgraphIndex) { return; }
- virtual void handleJobBegin(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *,
+ virtual void handleJobBegin(IExecutor *, ir::SubgraphIndex, ir::OperationIndex,
const backend::Backend *) = 0;
- virtual void handleJobEnd(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *,
+ virtual void handleJobEnd(IExecutor *, ir::SubgraphIndex, ir::OperationIndex,
const backend::Backend *) = 0;
/// @brief Invoked just after model (not individual operation) execution ends
{
public:
explicit ProfileObserver(std::shared_ptr<ExecTime> et, const ir::Graph &graph)
- : _et(std::move(et)), _graph(graph)
+ : _et(std::move(et)), _graph(graph)
{
}
- void handleJobBegin(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *,
+ void handleJobBegin(IExecutor *, ir::SubgraphIndex, ir::OperationIndex,
const backend::Backend *) override;
- void handleJobEnd(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *,
+ void handleJobEnd(IExecutor *, ir::SubgraphIndex, ir::OperationIndex,
const backend::Backend *) override;
void handleSubgraphEnd(ir::SubgraphIndex) override { _et->storeOperationsExecTime(); }
const util::TracingCtx *tracing_ctx);
~TracingObserver();
void handleSubgraphBegin(ir::SubgraphIndex) override;
- void handleJobBegin(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *,
+ void handleJobBegin(IExecutor *, ir::SubgraphIndex, ir::OperationIndex,
const backend::Backend *) override;
- void handleJobEnd(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *,
+ void handleJobEnd(IExecutor *, ir::SubgraphIndex, ir::OperationIndex,
const backend::Backend *) override;
void handleSubgraphEnd(ir::SubgraphIndex) override;
-private:
- static std::string opSequenceTag(const ir::OpSequence *op_seq, const ir::Operations &operations);
-
private:
std::unique_ptr<EventRecorder> _recorder;
EventCollector _collector;
#include "ExecutorBase.h"
#include "ShapeConverter.h"
-#include "backend/controlflow/UserTensor.h"
+#include "backend/builtin/UserTensor.h"
#include "util/logging.h"
#include "misc/polymorphic_downcast.h"
{
ExecutorBase::ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph,
+ backend::BackendContexts &&backend_contexts,
const compiler::TensorRegistries &tensor_regs,
const util::TracingCtx *tracing_ctx)
- : _lowered_graph{std::move(lowered_graph)}, _graph{_lowered_graph->graph()}, _mutex(),
- _tracing_ctx(tracing_ctx)
+ : _lowered_graph{std::move(lowered_graph)},
+ _backend_contexts{std::move(backend_contexts)}, _graph{_lowered_graph->graph()}, _mutex(),
+ _tracing_ctx(tracing_ctx)
{
auto build_tensor_list = [&](const auto &ind_seq, auto &tensors) {
assert(tensors.empty());
{
backend::ITensor *tensor = tensor_regs.getITensor(ind);
assert(tensor != nullptr);
- auto io_tensor = nnfw::misc::polymorphic_downcast<backend::controlflow::IOTensor *>(tensor);
+ auto io_tensor = nnfw::misc::polymorphic_downcast<backend::builtin::IOTensor *>(tensor);
tensors.push_back(io_tensor);
}
};
{
const auto orig_input_shape = input_tensor->orig_info().shape();
const auto changed_input_shape =
- convertShape(input->getShape(), input->layout(), input_tensor->orig_layout());
+ convertShape(input->getShape(), input->layout(), input_tensor->orig_layout());
+ if (input_tensor->get_info().shape() != changed_input_shape)
+ {
+ // TODO Fix this workaround that is introduced since cpu based kernels directly use `_info`
+ // rather than interface methods to avoid virtual function calls.
+ input_tensor->setShapeOfIPortableTensor(changed_input_shape);
+ }
if (orig_input_shape != changed_input_shape)
{
input_tensor->set_dynamic();
// set shape of outputDesc to tensor shape since tensor can be dynamic
const auto output_tensor_shape = _output_tensors[n]->getShape();
output.info.shape(
- convertShape(output_tensor_shape, _output_tensors[n]->layout(), output.layout));
+ convertShape(output_tensor_shape, _output_tensors[n]->layout(), output.layout));
}
}
#include "exec/IODescription.h"
#include "ir/Graph.h"
#include "ir/Index.h"
-#include "ir/LowerInfoMap.h"
+#include "compiler/GraphLowerInfo.h"
#include "ir/OperationIndexMap.h"
#include "compiler/LoweredGraph.h"
#include "compiler/TensorRegistries.h"
-#include "backend/controlflow/IOTensor.h"
+#include "backend/builtin/IOTensor.h"
#include "util/TracingCtx.h"
#include <cstdint>
* @param tensor_builders Tensor builders that are currently used
*/
ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph,
+ backend::BackendContexts &&backend_contexts,
const compiler::TensorRegistries &tensor_regs, const util::TracingCtx *tracing_ctx);
virtual ~ExecutorBase() = default;
void addObserver(std::unique_ptr<IExecutionObserver> ref) { _subject.add(std::move(ref)); };
- const std::vector<backend::controlflow::IOTensor *> &getOutputTensors() const override
+ const std::vector<backend::builtin::IOTensor *> &getOutputTensors() const override
{
return _output_tensors;
}
ExecutionObservee _subject;
std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
std::unique_ptr<compiler::LoweredGraph> _lowered_graph;
+ backend::BackendContexts _backend_contexts;
const ir::Graph &_graph;
- std::vector<backend::controlflow::IOTensor *> _input_tensors;
- std::vector<backend::controlflow::IOTensor *> _output_tensors;
+ std::vector<backend::builtin::IOTensor *> _input_tensors;
+ std::vector<backend::builtin::IOTensor *> _output_tensors;
std::mutex _mutex;
const util::TracingCtx *_tracing_ctx;
#include "exec/FunctionSequence.h"
#include "ir/Operation.h"
-#include "backend/IDynamicTensorManager.h"
#include "backend/ITensorRegistry.h"
#include "util/logging.h"
// acl_cl and acl_neon backend don't support dynamic shape.
// _dynamic_tensor_ctx is always nullptr for acl_cl and acl_neon
// Thus, those two bakends cannot reach here.
- if (_dynamic_tensor_ctx->op_seq->size() != _functions.size())
- throw std::runtime_error("operation and functions should be mapped one by one");
- auto op_seq_iter = _dynamic_tensor_ctx->op_seq->begin();
+ // Do dynamic shape inference
+ auto op_ind = _dynamic_tensor_ctx->op_ind;
+ auto &op = _dynamic_tensor_ctx->operations->at(op_ind);
+ op.accept(*_dynamic_tensor_ctx->dynamic_shape_inferer);
+
for (const auto &function : _functions)
{
- // set shape of output and allocate memory when needed
- auto &op = _dynamic_tensor_ctx->operations->at(*op_seq_iter);
- op.accept(*_dynamic_tensor_ctx->dynamic_shape_inferer);
-
+ // NOTE the function could be also FunctionSequence so we do this
+ // TODO Remove this or do this recursively
auto *sub_func_seq = dynamic_cast<FunctionSequence *>(function.get());
if (sub_func_seq != nullptr)
{
// run kernel
function->run();
-
- // deallocate input tensors which is no longer used
- _dynamic_tensor_ctx->dynamic_tensor_manager->deallocInput(*op_seq_iter);
-
- op_seq_iter++;
}
}
else
auto &dst_offsets = _dst_tensors_offsets.at(i);
if (src_tensor != dst_tensor)
{
- const auto rank = src_tensor->num_dimensions();
+ const auto rank = src_tensor->getShape().rank();
permute(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
}
}
case PermuteType::NHWC_TO_NCHW:
{
ir::FeatureShape shape;
- shape.N = dst->dimension(0);
- shape.C = dst->dimension(1);
- shape.H = dst->dimension(2);
- shape.W = dst->dimension(3);
+ auto dst_shape = dst->getShape();
+ shape.N = dst_shape.dim(0);
+ shape.C = dst_shape.dim(1);
+ shape.H = dst_shape.dim(2);
+ shape.W = dst_shape.dim(3);
typename feature::nchw::View<T>::Strides strides;
const auto start_offset = dst->calcOffset({0, 0, 0, 0});
- strides.W = dst->dimension(3) == 1 ? 0 : dst->calcOffset({0, 0, 0, 1}) - start_offset;
- strides.H = dst->dimension(2) == 1 ? 0 : dst->calcOffset({0, 0, 1, 0}) - start_offset;
- strides.C = dst->dimension(1) == 1 ? 0 : dst->calcOffset({0, 1, 0, 0}) - start_offset;
- strides.N = dst->dimension(0) == 1 ? 0 : dst->calcOffset({1, 0, 0, 0}) - start_offset;
+ strides.W = dst_shape.dim(3) == 1 ? 0 : dst->calcOffset({0, 0, 0, 1}) - start_offset;
+ strides.H = dst_shape.dim(2) == 1 ? 0 : dst->calcOffset({0, 0, 1, 0}) - start_offset;
+ strides.C = dst_shape.dim(1) == 1 ? 0 : dst->calcOffset({0, 1, 0, 0}) - start_offset;
+ strides.N = dst_shape.dim(0) == 1 ? 0 : dst->calcOffset({1, 0, 0, 0}) - start_offset;
const feature::nhwc::Reader<T> from(src);
feature::nchw::View<T> into(shape, strides,
case PermuteType::NCHW_TO_NHWC:
{
ir::FeatureShape shape;
- shape.N = dst->dimension(0);
- shape.H = dst->dimension(1);
- shape.W = dst->dimension(2);
- shape.C = dst->dimension(3);
+ auto dst_shape = dst->getShape();
+ shape.N = dst_shape.dim(0);
+ shape.H = dst_shape.dim(1);
+ shape.W = dst_shape.dim(2);
+ shape.C = dst_shape.dim(3);
typename feature::nhwc::View<T>::Strides strides;
const auto start_offset = dst->calcOffset({0, 0, 0, 0});
- strides.C = dst->dimension(3) == 1 ? 0 : dst->calcOffset({0, 0, 0, 1}) - start_offset;
- strides.W = dst->dimension(2) == 1 ? 0 : dst->calcOffset({0, 0, 1, 0}) - start_offset;
- strides.H = dst->dimension(1) == 1 ? 0 : dst->calcOffset({0, 1, 0, 0}) - start_offset;
- strides.N = dst->dimension(0) == 1 ? 0 : dst->calcOffset({1, 0, 0, 0}) - start_offset;
+ strides.C = dst_shape.dim(3) == 1 ? 0 : dst->calcOffset({0, 0, 0, 1}) - start_offset;
+ strides.W = dst_shape.dim(2) == 1 ? 0 : dst->calcOffset({0, 0, 1, 0}) - start_offset;
+ strides.H = dst_shape.dim(1) == 1 ? 0 : dst->calcOffset({0, 1, 0, 0}) - start_offset;
+ strides.N = dst_shape.dim(0) == 1 ? 0 : dst->calcOffset({1, 0, 0, 0}) - start_offset;
const feature::nchw::Reader<T> from(src);
feature::nhwc::View<T> into(shape, strides,
* _measurements[Backend*]["string"][bool][uint32_t] = int64_t
*/
using MeasurementData = std::unordered_map<
- const backend::Backend *,
- std::unordered_map<std::string, std::unordered_map<bool, std::map<uint32_t, int64_t>>>>;
+ const backend::Backend *,
+ std::unordered_map<std::string, std::unordered_map<bool, std::map<uint32_t, int64_t>>>>;
class JSON
{
public:
explicit JSON(const std::vector<const backend::Backend *> &backends,
MeasurementData &measurements)
- : _measurement_file("exec_time.json"), _backends(), _measurements(measurements)
+ : _measurement_file("exec_time.json"), _backends(), _measurements(measurements)
{
for (const auto b : backends)
{
namespace exec
{
-#ifdef RUY_PROFILER
-namespace
-{
-char *seq_to_label(const onert::ir::OpSequence *op_seq, const onert::ir::Operations &operations)
-{
- auto node_name = operations.at(*op_seq->begin()).name();
- char *cstr = new char[node_name.length() + 1];
- std::strcpy(cstr, node_name.c_str());
- return cstr;
-}
-} // namespace
-#endif
-
void LinearExecutor::executeImpl()
{
auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_graph);
_subject.notifySubgraphBegin(profiling_subg_index);
for (auto &&code : _code)
{
- const auto op_seq = code.op_seq;
const auto backend = code.lower_info->backend();
// TODO : Move ruy profiler into ExecutionObserver
#ifdef RUY_PROFILER
- ruy::profiler::ScopeLabel label(seq_to_label(op_seq, _graph.operations()));
+ ruy::profiler::ScopeLabel label(code.op->name());
#endif
- _subject.notifyJobBegin(this, profiling_subg_index, op_seq, backend);
+ _subject.notifyJobBegin(this, profiling_subg_index, code.op_ind, backend);
auto &fn_seq = code.fn_seq;
fn_seq->initRunning();
- bool handle_dynamic_tensor = op_seq->has_dynamic_tensor() || hasDynamicInput();
+ bool handle_dynamic_tensor =
+ _lowered_graph->getHasDynamicTensor(code.op_ind) || hasDynamicInput();
fn_seq->enableDynamicShapeInferer(handle_dynamic_tensor);
fn_seq->run();
- _subject.notifyJobEnd(this, profiling_subg_index, op_seq, backend);
+ _subject.notifyJobEnd(this, profiling_subg_index, code.op_ind, backend);
}
_subject.notifySubgraphEnd(profiling_subg_index);
}
* @brief Construct a new LinearExecutor object
* @param lowered_graph LoweredGraph object
* @param tensor_builders Tensor builders that are currently used
- * @param code_map OpSequence and its code map
+ * @param code_map @c ir::Operation and its code map
*/
LinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+ backend::BackendContexts &&backend_contexts,
const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map,
- const std::vector<ir::OpSequenceIndex> &order, const util::TracingCtx *tracing_ctx)
- : ExecutorBase{std::move(lowered_graph), tensor_regs, tracing_ctx}
+ const std::vector<ir::OperationIndex> &order, const util::TracingCtx *tracing_ctx)
+ : ExecutorBase{std::move(lowered_graph), std::move(backend_contexts), tensor_regs, tracing_ctx}
{
for (auto index : order)
{
public:
HookFunction(IFunction *fn, const std::function<void()> &setup,
const std::function<void()> &teardown)
- : _fn{fn}, _setup{setup}, _teardown{teardown}
+ : _fn{fn}, _setup{setup}, _teardown{teardown}
{
}
}
ParallelExecutor::ParallelExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+ backend::BackendContexts &&backend_contexts,
const compiler::TensorRegistries &tensor_regs,
compiler::CodeMap &&code_map,
const util::TracingCtx *tracing_ctx)
- : DataflowExecutor{std::move(lowered_graph), tensor_regs, std::move(code_map), tracing_ctx}
+ : DataflowExecutor{std::move(lowered_graph), std::move(backend_contexts), tensor_regs,
+ std::move(code_map), tracing_ctx}
{
VERBOSE(ParallelExecutor) << "Constructing Parallel Executor" << std::endl;
}
bool dynamic_input_exists = hasDynamicInput();
// Init scheduler
- // TODO Consider to have distinct backend set in LowerInfoMap
+ // TODO Consider to have distinct backend set in GraphLowerInfo
BackendSet backends;
- for (auto &itr : _lowered_graph->getLowerInfo()->op_seq)
- {
- backends.add(itr.second->backend());
- }
+ _lowered_graph->lower_info().operation.iterate(
+ [&](const ir::OperationIndex &, const compiler::OperationLowerInfo &lower_info) {
+ backends.add(lower_info.backend());
+ });
_scheduler = std::make_unique<ParallelScheduler>(backends);
assert(noWaitingJobs());
lock.unlock();
- VERBOSE(ParallelExecutor) << "Assigning fn #" << job->index() << std::endl;
+ VERBOSE(ParallelExecutor) << "Assigning fn " << job->index() << std::endl;
auto job_index = job->index();
- auto op_sequence_index = _job_to_op_seq[job_index];
- auto op_seq = &_lowered_graph->op_seqs().at(op_sequence_index);
- auto backend = _lowered_graph->getLowerInfo()->op_seq.at(op_sequence_index)->backend();
- auto setup = [&, op_seq, backend]() {
- _subject.notifyJobBegin(this, profiling_subg_index, op_seq, backend);
+ auto op_ind = _job_to_op[job_index];
+ auto backend = _lowered_graph->lower_info().operation.at(op_ind).backend();
+ auto setup = [&, op_ind, backend]() {
+ _subject.notifyJobBegin(this, profiling_subg_index, op_ind, backend);
};
- auto teardown = [&, job_index, op_seq, backend]() {
- _subject.notifyJobEnd(this, profiling_subg_index, op_seq, backend);
+ auto teardown = [&, job_index, op_ind, backend]() {
+ _subject.notifyJobEnd(this, profiling_subg_index, op_ind, backend);
notify(job_index);
};
job->fn_seq()->initRunning();
// dynamic tensor setting
- bool handle_dynamic_tensor = op_seq->has_dynamic_tensor() || dynamic_input_exists;
+ bool handle_dynamic_tensor =
+ _lowered_graph->getHasDynamicTensor(op_ind) || dynamic_input_exists;
job->fn_seq()->enableDynamicShapeInferer(handle_dynamic_tensor);
_scheduler->assign(std::make_unique<HookFunction>(job->fn_seq(), setup, teardown), backend);
*
* @param lowered_graph LoweredGraph object
* @param tensor_builders Tensor builders that are currently used
- * @param code_map OpSequence and its code map
+ * @param code_map @c ir::Operation and its code map
*/
ParallelExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+ backend::BackendContexts &&backend_contexts,
const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map,
const util::TracingCtx *tracing_ctx);
using Strides = ir::FeatureShape;
// Construct for buffer and strides
Reader(const ir::FeatureShape &shape, const Strides &strides, const T *ptr, size_t len)
- : _shape{shape}, _strides{strides}, _ptr{reinterpret_cast<const uint8_t *>(ptr)}, _len{len}
+ : _shape{shape}, _strides{strides}, _ptr{reinterpret_cast<const uint8_t *>(ptr)}, _len{len}
{
UNUSED_RELEASE(len); // Workaround for unused variable in release mode
assert(len == static_cast<size_t>(strides.N != 0
- ? shape.N * strides.N
- : strides.C != 0 ? shape.C * strides.C
- : strides.H != 0 ? shape.H * strides.H
- : shape.W * strides.W));
+ ? shape.N * strides.N
+ : strides.C != 0 ? shape.C * strides.C
+ : strides.H != 0 ? shape.H * strides.H
+ : shape.W * strides.W));
}
// Construct for backend tensor
Reader(backend::ITensor *tensor)
- : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
+ : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
{
assert(tensor->layout() == ir::Layout::NCHW);
const auto start_offset = tensor->calcOffset({0, 0, 0, 0});
- _strides.W = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
- _strides.H = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
- _strides.C = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
- _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
-
- _shape.W = tensor->dimension(3);
- _shape.H = tensor->dimension(2);
- _shape.C = tensor->dimension(1);
- _shape.N = tensor->dimension(0);
+ auto shape = tensor->getShape();
+ _strides.W = shape.dim(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
+ _strides.H = shape.dim(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
+ _strides.C = shape.dim(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
+ _strides.N = shape.dim(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
+
+ _shape.W = shape.dim(3);
+ _shape.H = shape.dim(2);
+ _shape.C = shape.dim(1);
+ _shape.N = shape.dim(0);
}
public:
using Strides = typename Reader<T>::Strides;
// Construct for buffer of model inputs
View(const ir::FeatureShape &shape, const Strides &strides, T *ptr, size_t len)
- : Reader<T>{shape, strides, ptr, len}
+ : Reader<T>{shape, strides, ptr, len}
{
// DO NOTHING
}
using Strides = ir::FeatureShape;
// Construct for buffer and strides
Reader(const ir::FeatureShape &shape, const Strides &strides, const T *ptr, size_t len)
- : _shape{shape}, _strides{strides}, _ptr{reinterpret_cast<const uint8_t *>(ptr)}, _len{len}
+ : _shape{shape}, _strides{strides}, _ptr{reinterpret_cast<const uint8_t *>(ptr)}, _len{len}
{
UNUSED_RELEASE(len); // Workaround for unused variable in release mode
assert(len == static_cast<size_t>(strides.N != 0
- ? shape.N * strides.N
- : strides.H != 0 ? shape.H * strides.H
- : strides.W != 0 ? shape.W * strides.W
- : shape.C * strides.C));
+ ? shape.N * strides.N
+ : strides.H != 0 ? shape.H * strides.H
+ : strides.W != 0 ? shape.W * strides.W
+ : shape.C * strides.C));
}
// Construct for backend tensor
Reader(const backend::ITensor *tensor)
- : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
+ : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
{
assert(tensor->layout() == ir::Layout::NHWC);
const auto start_offset = tensor->calcOffset({0, 0, 0, 0});
- _strides.C = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
- _strides.W = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
- _strides.H = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
- _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
-
- _shape.C = tensor->dimension(3);
- _shape.W = tensor->dimension(2);
- _shape.H = tensor->dimension(1);
- _shape.N = tensor->dimension(0);
+ auto shape = tensor->getShape();
+ _strides.C = shape.dim(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
+ _strides.W = shape.dim(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
+ _strides.H = shape.dim(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
+ _strides.N = shape.dim(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
+
+ _shape.C = shape.dim(3);
+ _shape.W = shape.dim(2);
+ _shape.H = shape.dim(1);
+ _shape.N = shape.dim(0);
}
public:
using Strides = typename Reader<T>::Strides;
// Construct for buffer and strides
View(const ir::FeatureShape &shape, const Strides &strides, T *ptr, size_t len)
- : Reader<T>{shape, strides, ptr, len}
+ : Reader<T>{shape, strides, ptr, len}
{
// DO NOTHING
}
auto input_tensor = std::make_shared<ROTensor>(input->info);
input_tensor->setData(std::make_shared<const ir::ExternalData>(
- reinterpret_cast<const uint8_t *>(input->buffer), input->size));
+ reinterpret_cast<const uint8_t *>(input->buffer), input->size));
tensor_map[input_index] = input_tensor;
}
{
if (tensor_map.find(index) != tensor_map.end())
{
- VERBOSE(INTERPRETER) << "Assign input tensor. operand index:" << index.value() << std::endl;
+ VERBOSE(INTERPRETER) << "Assign input tensor. operand index:" << index << std::endl;
interp_env->assignTensor(index, tensor_map.at(index));
}
}
<< std::endl;
interp_env->assignExternalBuffer(
- output_index, std::make_shared<ExternalBuffer>(reinterpret_cast<uint8_t *>(output->buffer),
- output->size));
+ output_index,
+ std::make_shared<ExternalBuffer>(reinterpret_cast<uint8_t *>(output->buffer), output->size));
}
// Allocate constant tensor
_graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
if (obj.isConstant())
{
- VERBOSE(INTERPRETER) << "Allocate and assign constant tensor. operand index:" << ind.value()
+ VERBOSE(INTERPRETER) << "Allocate and assign constant tensor. operand index:" << ind
<< std::endl;
assert(obj.data());
auto const_tensor = std::make_shared<ROTensor>(obj.info());
// Assume that interpreter's tensor layout is same with model (NHWC)
const_tensor->setData(
- std::make_shared<ir::ExternalData>(obj.data()->base(), obj.info().total_size()));
+ std::make_shared<ir::ExternalData>(obj.data()->base(), obj.info().total_size()));
interp_env->assignTensor(ind, const_tensor);
}
});
*/
const ir::Graph &graph() final { return _graph; }
void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>>) override{
- // Not implemented
+ // Not implemented
};
/**
* @brief Start execution
{
throw new std::runtime_error{"Interpreter does not support subgraph calls(control flow ops)"};
}
- const std::vector<backend::controlflow::IOTensor *> &getOutputTensors() const final
+ const std::vector<backend::builtin::IOTensor *> &getOutputTensors() const final
{
throw new std::runtime_error{"Interpreter does not support this function."};
}
const ir::Operation &node = _env->graph().operations().at(idx);
const auto nodeName = node.name();
VERBOSE(INTERPRETER) << "Prepare output operands and execute " << nodeName
- << " operation (id: " << idx.value() << ")" << std::endl;
+ << " operation (id: " << idx << ")" << std::endl;
const auto nodeOpCode = node.opcode();
if (_kernels.find(nodeOpCode) == _kernels.end())
// But that scenario may not exist
for (auto ind : _env->graph().getInputs())
{
- VERBOSE(INTERPRETER) << "Input: Push to operand stack " << ind.value() << std::endl;
+ VERBOSE(INTERPRETER) << "Input: Push to operand stack " << ind << std::endl;
operand_stack.push(ind);
}
_env->graph().operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
if (obj.isConstant())
{
- VERBOSE(INTERPRETER) << "Constant: Push to operand stack " << ind.value() << std::endl;
+ VERBOSE(INTERPRETER) << "Constant: Push to operand stack " << ind << std::endl;
operand_stack.push(ind);
}
if (operator_ready)
{
- VERBOSE(INTERPRETER) << "Ready to execute operation " << use_operator.value() << std::endl;
+ VERBOSE(INTERPRETER) << "Ready to execute operation " << use_operator << std::endl;
operation_stack.push(use_operator);
}
}
{
const auto current_operation_index = operation_stack.top();
operation_stack.pop();
- VERBOSE(INTERPRETER) << "Poped operation: " << current_operation_index.value() << "("
+ VERBOSE(INTERPRETER) << "Poped operation: " << current_operation_index << "("
<< _env->graph().operations().at(current_operation_index).name() << ")"
<< std::endl;
return ir::Layout::NHWC;
}
+ir::Shape Tensor::getShape() const { return _info.shape(); }
+
+ir::Shape ROTensor::getShape() const { return _info.shape(); }
+
} // namespace interp
} // namespace onert
virtual void releaseData() = 0;
virtual size_t total_size() const = 0;
- virtual size_t dimension(size_t index) const = 0;
- virtual size_t num_dimensions() const = 0;
virtual size_t calcOffset(const ir::Coordinates &coords) const = 0;
virtual bool has_padding() const = 0;
void releaseData() override { _data = nullptr; }
size_t total_size() const override { return _info.total_size(); }
- size_t dimension(size_t index) const override { return _info.shape().dim(index); }
- size_t num_dimensions() const override { return _info.shape().rank(); }
size_t calcOffset(const ir::Coordinates &coords) const override;
ir::Layout layout() const override;
bool is_dynamic() const override { return false; }
bool has_padding() const override { return false; }
ir::DataType data_type() const override { return _info.typeInfo().type(); }
float data_scale() const override { return _info.typeInfo().scale(); }
- int32_t data_offset() const override { return _info.typeInfo().offset(); }
+ int32_t data_zero_point() const override { return _info.typeInfo().zero_point(); }
+ const std::vector<float> &data_scales() const override { return _info.typeInfo().scales(); }
+ const std::vector<int32_t> &data_zero_points() const override
+ {
+ return _info.typeInfo().zero_points();
+ }
const ir::OperandInfo &tensorInfo() const override { return _info; }
uint64_t num_elements() const override { return _info.shape().num_elements(); };
+ ir::Shape getShape() const override;
private:
const ir::OperandInfo _info;
void releaseData() override { _buffer = nullptr; }
size_t total_size() const override { return _info.total_size(); }
- size_t dimension(size_t index) const override { return _info.shape().dim(index); }
- size_t num_dimensions() const override { return _info.shape().rank(); }
size_t calcOffset(const ir::Coordinates &coords) const override;
ir::Layout layout() const override;
bool is_dynamic() const override { return false; }
bool has_padding() const override { return false; }
ir::DataType data_type() const override { return _info.typeInfo().type(); }
float data_scale() const override { return _info.typeInfo().scale(); }
- int32_t data_offset() const override { return _info.typeInfo().offset(); }
+ int32_t data_zero_point() const override { return _info.typeInfo().zero_point(); }
+ const std::vector<float> &data_scales() const override { return _info.typeInfo().scales(); }
+ const std::vector<int32_t> &data_zero_points() const override
+ {
+ return _info.typeInfo().zero_points();
+ }
const ir::OperandInfo &tensorInfo() const override { return _info; }
uint64_t num_elements() const override { return _info.shape().num_elements(); };
+ ir::Shape getShape() const override;
private:
const ir::OperandInfo _info;
void prepare(ExecEnv *env, const ir::Operation &node)
{
const auto &arithmetic_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node);
+ nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node);
const auto lhs_index = node.getInputs().at(arithmetic_node.LHS);
const auto rhs_index = node.getInputs().at(arithmetic_node.RHS);
}
auto output_info =
- ir::OperandInfo::createStaticInfo(out_shape, lhs_tensor->tensorInfo().typeInfo());
+ ir::OperandInfo::createStaticInfo(out_shape, lhs_tensor->tensorInfo().typeInfo());
// We can handle already allocated (ex. model output)
env->allocateIfNeeded(out_index, output_info);
}
raw_type *out_ptr = reinterpret_cast<raw_type *>(out_buffer);
const auto cker_op_type =
- (op_type == OpType::ADD)
- ? nnfw::cker::BinaryArithmeticOpType::ADD
- : ((op_type == OpType::SUB) ? nnfw::cker::BinaryArithmeticOpType::SUB
- : nnfw::cker::BinaryArithmeticOpType::MUL);
+ (op_type == OpType::ADD) ? nnfw::cker::BinaryArithmeticOpType::ADD
+ : ((op_type == OpType::SUB) ? nnfw::cker::BinaryArithmeticOpType::SUB
+ : nnfw::cker::BinaryArithmeticOpType::MUL);
- const bool need_broadcast = nnfw::cker::ProcessBroadcastShapes(
- convertShape(lhs_tensor->tensorInfo().shape()),
- convertShape(rhs_tensor->tensorInfo().shape()), &cker_param);
+ const bool need_broadcast =
+ nnfw::cker::ProcessBroadcastShapes(convertShape(lhs_tensor->tensorInfo().shape()),
+ convertShape(rhs_tensor->tensorInfo().shape()), &cker_param);
if (need_broadcast)
{
void invokeBinaryArithmeticOps(const ExecEnv *env, const ir::Operation &node)
{
const auto &arithmetic_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node);
+ nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node);
switch (arithmetic_node.param().arithmetic_type)
{
const auto first_tensor = env->tensorAt(first_index);
uint32_t out_axis_dimension = 0;
const int32_t axis_raw = concat_node.param().axis;
- const uint32_t axis = (axis_raw < 0) ? (axis_raw + first_tensor->num_dimensions()) : axis_raw;
+ const int32_t axis = (axis_raw < 0) ? (axis_raw + first_tensor->getShape().rank()) : axis_raw;
// All inputs shape should be same except axis dimension
// All inputs type should be same
for (auto input : node.getInputs())
{
- assert(first_tensor->num_dimensions() == env->tensorAt(input)->num_dimensions());
+ assert(first_tensor->getShape().rank() == env->tensorAt(input)->getShape().rank());
assert(first_tensor->data_type() == env->tensorAt(input)->data_type());
- for (uint32_t i = 0; i < first_tensor->num_dimensions(); i++)
+ for (int i = 0; i < first_tensor->getShape().rank(); i++)
{
if (i == axis)
{
- out_axis_dimension += env->tensorAt(input)->dimension(i);
+ out_axis_dimension += env->tensorAt(input)->getShape().dim(i);
continue;
}
- assert(first_tensor->dimension(i) == env->tensorAt(input)->dimension(i));
+ assert(first_tensor->getShape().dim(i) == env->tensorAt(input)->getShape().dim(i));
}
}
// Make output tensor info using first input tensor info, and accumulated axis dimension value
auto out_shape = first_tensor->tensorInfo().shape();
out_shape.dim(axis) = out_axis_dimension;
- env->allocateIfNeeded(out_index, ir::OperandInfo::createStaticInfo(
- out_shape, first_tensor->tensorInfo().typeInfo()));
+ env->allocateIfNeeded(
+ out_index, ir::OperandInfo::createStaticInfo(out_shape, first_tensor->tensorInfo().typeInfo()));
auto out_tensor = env->tensorAt(out_index);
UNUSED_RELEASE(out_tensor);
- // Output shape should be same with input except axis dimension
+ // Output shape should be same with input except axis getShape().dim
// Output type should be same with input
assert(first_tensor->data_type() == out_tensor->data_type());
- for (uint32_t i = 0; i < first_tensor->num_dimensions(); i++)
+ for (int i = 0; i < first_tensor->getShape().rank(); i++)
{
if (i == axis)
{
continue;
}
- assert(first_tensor->dimension(i) == out_tensor->dimension(i));
+ assert(first_tensor->getShape().dim(i) == out_tensor->getShape().dim(i));
}
}
const auto out_index = node.getOutputs().at(0);
const auto out_tensor = env->tensorAt(out_index);
- const uint32_t axis = (axis_raw < 0) ? (axis_raw + out_tensor->num_dimensions()) : axis_raw;
+ const uint32_t axis = (axis_raw < 0) ? (axis_raw + out_tensor->getShape().rank()) : axis_raw;
const auto data_type = in_tensors[0]->data_type();
if (data_type == ir::DataType::FLOAT32)
const auto kernel_tensor = env->tensorAt(kernel_index);
const auto bias_tensor = env->tensorAt(bias_index);
- assert(in_tensor->num_dimensions() == 4);
- assert(kernel_tensor->num_dimensions() == 4);
- assert(bias_tensor->num_dimensions() == 1);
+ assert(in_tensor->getShape().rank() == 4);
+ assert(kernel_tensor->getShape().rank() == 4);
+ assert(bias_tensor->getShape().rank() == 1);
UNUSED_RELEASE(in_tensor);
UNUSED_RELEASE(kernel_tensor);
// Handle unspecified output shape
const auto &conv_node = nnfw::misc::polymorphic_downcast<const ir::operation::Conv2D &>(node);
const auto infered_output_shape = shape_inference::inferConv2DShape(
- in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(), conv_node.param());
+ in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(), conv_node.param());
env->allocateIfNeeded(
- out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
+ out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
}
else
{
// Handle same ifm & ofm data type only
assert(in_tensor->data_type() == out_tensor->data_type());
- assert(out_tensor->num_dimensions() == 4);
+ assert(out_tensor->getShape().rank() == 4);
}
void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor,
const auto &ker_shape = ker_tensor->tensorInfo().shape();
const auto ker_height = ker_shape.dim(1);
const auto ker_width = ker_shape.dim(2);
- const auto padding = ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride,
- ker_width, ker_height);
+ const auto padding =
+ ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, ker_width, ker_height);
// Calculate
float activation_min, activation_max;
const auto kernel_tensor = env->tensorAt(kernel_index);
const auto bias_tensor = env->tensorAt(bias_index);
- assert(in_tensor->num_dimensions() == 4);
- assert(kernel_tensor->num_dimensions() == 4);
- assert(bias_tensor->num_dimensions() == 1);
+ assert(in_tensor->getShape().rank() == 4);
+ assert(kernel_tensor->getShape().rank() == 4);
+ assert(bias_tensor->getShape().rank() == 1);
UNUSED_RELEASE(in_tensor);
UNUSED_RELEASE(kernel_tensor);
{
// Handle unspecified output shape
const auto &depth_conv_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::DepthwiseConv2D &>(node);
+ nnfw::misc::polymorphic_downcast<const ir::operation::DepthwiseConv2D &>(node);
const auto infered_output_shape = shape_inference::inferDepthwiseConv2DShape(
- in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(),
- depth_conv_node.param());
+ in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(),
+ depth_conv_node.param());
env->allocateIfNeeded(
- out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
+ out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
}
else
{
// Handle same ifm & ofm data type only
assert(in_tensor->data_type() == out_tensor->data_type());
- assert(out_tensor->num_dimensions() == 4);
+ assert(out_tensor->getShape().rank() == 4);
}
void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor,
const auto &ker_shape = ker_tensor->tensorInfo().shape();
const auto ker_height = ker_shape.dim(1);
const auto ker_width = ker_shape.dim(2);
- const auto padding = ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride,
- ker_width, ker_height);
+ const auto padding =
+ ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, ker_width, ker_height);
// Calculate
float activation_min, activation_max;
else
{
const auto &act_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node);
+ nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node);
evalFloat<act_type>(input_start, out, elements, act_node.param().alpha,
act_node.param().beta);
}
void invokeElementwiseActivation(const ExecEnv *env, const ir::Operation &node)
{
const auto &act_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node);
+ nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node);
switch (act_node.param().op_type)
{
case ir::operation::ElementwiseActivation::Type::LOGISTIC:
UNUSED_RELEASE(kernel_tensor);
UNUSED_RELEASE(bias_tensor);
- assert(in_tensor->num_dimensions() >= 2);
- assert(kernel_tensor->num_dimensions() == 2);
- assert(bias_tensor->num_dimensions() == 1);
+ assert(in_tensor->getShape().rank() >= 2);
+ assert(kernel_tensor->getShape().rank() == 2);
+ assert(bias_tensor->getShape().rank() == 1);
const auto input_size_with_batch = in_tensor->num_elements();
- const auto num_units = kernel_tensor->dimension(0);
- const auto input_size = kernel_tensor->dimension(1);
- const auto batch_size = input_size_with_batch / input_size;
+ const auto num_units = kernel_tensor->getShape().dim(0);
+ const auto input_size = kernel_tensor->getShape().dim(1);
+ const int32_t batch_size = input_size_with_batch / input_size;
assert(input_size_with_batch % input_size == 0);
- assert(num_units == bias_tensor->dimension(0));
+ assert(num_units == bias_tensor->getShape().dim(0));
// Make output tensor info
ir::Shape output_shape(2);
output_shape.dim(0) = batch_size;
output_shape.dim(1) = num_units;
const auto out_info =
- ir::OperandInfo::createStaticInfo(output_shape, in_tensor->tensorInfo().typeInfo());
+ ir::OperandInfo::createStaticInfo(output_shape, in_tensor->tensorInfo().typeInfo());
env->allocateIfNeeded(out_index, out_info);
auto out_tensor = env->tensorAt(out_index);
// Handle same ifm & ofm data type only
assert(in_tensor->data_type() == out_tensor->data_type());
- assert(out_tensor->num_dimensions() == 2);
- assert(out_tensor->dimension(0) == batch_size);
- assert(out_tensor->dimension(1) == num_units);
+ assert(out_tensor->getShape().rank() == 2);
+ assert(out_tensor->getShape().dim(0) == batch_size);
+ assert(out_tensor->getShape().dim(1) == num_units);
}
void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor,
void invokeFC(const ExecEnv *env, const ir::Operation &node)
{
const auto &conv_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::FullyConnected &>(node);
+ nnfw::misc::polymorphic_downcast<const ir::operation::FullyConnected &>(node);
const auto ifm_index = node.getInputs().at(ir::operation::FullyConnected::INPUT);
const auto ker_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
}
auto output_tensor = env->tensorAt(output_index);
- auto output_rank = input_tensor->num_dimensions() + indices_tensor->num_dimensions() - 1;
+ auto output_rank = input_tensor->getShape().rank() + indices_tensor->getShape().rank() - 1;
- if (output_rank != output_tensor->num_dimensions())
+ if (output_rank != output_tensor->getShape().rank())
{
throw std::runtime_error{"Interp(Gather): Invalid output rank"};
}
input_tensor->tensorInfo().typeInfo() != output_tensor->tensorInfo().typeInfo())
{
throw std::runtime_error{
- "Interp(Gather): Cannot handle different I/O QUANT_UINT8_ASYMM scale/offset"};
+ "Interp(Gather): Cannot handle different I/O QUANT_UINT8_ASYMM scale/offset"};
}
}
const auto input_tensor = env->tensorAt(input_index);
const auto indices_tensor = env->tensorAt(indices_index);
const auto output_tensor = env->tensorAt(output_index);
- const uint32_t axis = (axis_raw < 0) ? (axis_raw + input_tensor->num_dimensions()) : axis_raw;
+ const uint32_t axis = (axis_raw < 0) ? (axis_raw + input_tensor->getShape().rank()) : axis_raw;
const auto data_type = input_tensor->data_type();
void prepareInstanceNorm(ExecEnv *env, const ir::Operation &node)
{
const auto &instancenorm_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node);
+ nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node);
const auto input_index = node.getInputs().at(instancenorm_node.INPUT);
const auto output_index = node.getOutputs().at(0);
const auto input_tensor = env->tensorAt(input_index);
- if (input_tensor->num_dimensions() != 4)
+ if (input_tensor->getShape().rank() != 4)
{
throw std::runtime_error{"Interp(InstanceNorm): Input should be 4D-tensor"};
}
void invokeInstanceNorm(const ExecEnv *env, const ir::Operation &node)
{
const auto &instancenorm_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node);
+ nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node);
const auto input_index = node.getInputs().at(instancenorm_node.INPUT);
const auto gamma_index = node.getInputs().at(instancenorm_node.GAMMA);
const auto pad_buffer = pad_tensor->bufferRO();
auto output_buffer = output_tensor->buffer();
- int32_t pad_rank = pad_tensor->dimension(0);
+ int32_t pad_rank = pad_tensor->getShape().dim(0);
const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
const auto in_tensor = env->tensorAt(in_index);
UNUSED_RELEASE(in_tensor);
- assert(in_tensor->num_dimensions() == 4);
+ assert(in_tensor->getShape().rank() == 4);
const auto output_info = env->graph().operands().at(out_index).info();
if (output_info.total_size() == 0)
{
// Handle unspecified output shape
const auto infered_output_shape =
- shape_inference::inferPoolShape(in_tensor->tensorInfo().shape(), pool_node.param());
+ shape_inference::inferPoolShape(in_tensor->tensorInfo().shape(), pool_node.param());
env->allocateIfNeeded(
- out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
+ out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
}
else
{
// Handle same ifm & ofm data type only
assert(in_tensor->data_type() == out_tensor->data_type());
- assert(out_tensor->num_dimensions() == 4);
+ assert(out_tensor->getShape().rank() == 4);
}
template <typename T>
const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
const auto param = pool_node.param();
const auto padding =
- ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, param.kw, param.kh);
+ ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, param.kw, param.kh);
// Calculate
nnfw::cker::PoolParams cker_param;
cker_param.filter_width = param.kw;
const auto in_tensor = env->tensorAt(in_index);
UNUSED_RELEASE(in_tensor);
- assert((in_tensor->num_dimensions() == 4) || (in_tensor->num_dimensions() == 2));
+ assert((in_tensor->getShape().rank() == 4) || (in_tensor->getShape().rank() == 2));
// Output shape should be same with input
// Output type is pre-defined in model
UNUSED_RELEASE(out_tensor);
// Check output shape is same with input
- assert(out_tensor->num_dimensions() == out_tensor->num_dimensions());
- for (uint32_t i = 0; i < in_tensor->num_dimensions(); i++)
+ assert(out_tensor->getShape().rank() == out_tensor->getShape().rank());
+ for (int32_t i = 0; i < in_tensor->getShape().rank(); i++)
{
- assert(in_tensor->dimension(i) == out_tensor->dimension(i));
+ assert(in_tensor->getShape().dim(i) == out_tensor->getShape().dim(i));
}
}
float beta = param.beta;
- if (in_tensor->num_dimensions() == 2)
+ if (in_tensor->getShape().rank() == 2)
{
- uint32_t batch_size = in_tensor->dimension(0);
- uint32_t input_size = in_tensor->dimension(1);
+ uint32_t batch_size = in_tensor->getShape().dim(0);
+ uint32_t input_size = in_tensor->getShape().dim(1);
nnfw::cker::Softmax(in_ptr, input_size, batch_size, beta, out_ptr);
}
- else if (in_tensor->num_dimensions() == 4)
+ else if (in_tensor->getShape().rank() == 4)
{
const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
const auto ker_tensor = env->tensorAt(ker_index);
const auto ofm_shape_tensor = env->tensorAt(ofm_shape_index);
- assert(ifm_tensor->num_dimensions() == 4);
- assert(ker_tensor->num_dimensions() == 4);
- assert(ofm_shape_tensor->num_dimensions() == 1);
+ assert(ifm_tensor->getShape().rank() == 4);
+ assert(ker_tensor->getShape().rank() == 4);
+ assert(ofm_shape_tensor->getShape().rank() == 1);
UNUSED_RELEASE(ifm_tensor);
UNUSED_RELEASE(ker_tensor);
throw std::runtime_error{"Interp(TConv): Different I/O data dype"};
}
- if (ofm_tensor->num_dimensions() != 4)
+ if (ofm_tensor->getShape().rank() != 4)
{
throw std::runtime_error{"Interp(TConv): Invalid output rank"};
}
const auto ker_shape = ker_tensor->tensorInfo().shape();
const auto ker_height = ker_shape.dim(1);
const auto ker_width = ker_shape.dim(2);
- const auto padding = ir::calculatePadding(param.padding, ofm_shape, ifm_shape, param.stride,
- ker_width, ker_height);
+ const auto padding =
+ ir::calculatePadding(param.padding, ofm_shape, ifm_shape, param.stride, ker_width, ker_height);
nnfw::cker::TransposeConvParams cker_param;
cker_param.padding_values.width = padding.left;
void invokeTransposeConv(const ExecEnv *env, const ir::Operation &node)
{
const auto &tconv_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::TransposeConv &>(node);
+ nnfw::misc::polymorphic_downcast<const ir::operation::TransposeConv &>(node);
const auto ifm_index = node.getInputs().at(ir::operation::TransposeConv::INPUT);
const auto ker_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL);
#include "OperationValidator.h"
#include <algorithm>
+
#include <bitset>
#include <sstream>
#include "util/logging.h"
+#include "util/Set.h"
#include "verifier/Verifier.h"
-#include "ir/operation/LowerInfo.h"
-#include "ir/operand/LowerInfo.h"
-#include "ir/operand/PermuteFactor.h"
#include "ir/OperandIndexMap.h"
-#include "ir/GraphIterator.h"
+#include "ir/OperationIndexMap.h"
+#include "dumper/text/GraphDumper.h"
#include "backend/IConfig.h"
namespace onert
return _operands.emplace(shape, type);
}
-OperationIndex Graph::addOperation(std::unique_ptr<Operation> &&node)
+OperandIndex Graph::addOperand(OperandIndex index, std::unique_ptr<Operand> &&operand)
+{
+ return _operands.push(std::move(operand), index);
+}
+
+bool Graph::checkOperandsForOperation(const Operation &operation)
+{
+ auto inputs = operation.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+ auto outputs = operation.getOutputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+ for (auto input : inputs)
+ if (!operands().exist(input))
+ return false;
+ for (auto input : outputs)
+ if (!operands().exist(input))
+ return false;
+ return true;
+}
+
+void Graph::linkOperandToOperation(OperationIndex index, const Operation &operation)
{
- assert(isBuildingPhase());
- return _operations.push(std::move(node));
+ auto inputs = operation.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+ auto outputs = operation.getOutputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+
+ for (auto input : inputs)
+ operands().at(input).insertUse(index);
+ for (auto output : outputs)
+ operands().at(output).setDef(index);
+}
+
+OperationIndex Graph::addOperation(std::unique_ptr<Operation> &&operation)
+{
+ const Operation &op_ref = *operation;
+ if (!checkOperandsForOperation(op_ref))
+ return OperationIndex{};
+ auto ind = _operations.push(std::move(operation));
+ if (ind.valid())
+ linkOperandToOperation(ind, op_ref);
+ return ind;
+}
+
+OperationIndex Graph::addOperation(OperationIndex index, std::unique_ptr<Operation> &&operation)
+{
+ const Operation &op_ref = *operation;
+ if (!checkOperandsForOperation(op_ref))
+ return OperationIndex{};
+ auto ind_gen = _operations.push(std::move(operation), index);
+ if (ind_gen.valid())
+ {
+ assert(ind_gen == index);
+ linkOperandToOperation(index, op_ref);
+ }
+ return index;
}
void Graph::setOperandValue(const OperandIndex &ind, std::shared_ptr<Data> data)
{
- assert(isBuildingPhase());
assert(_operands.exist(ind));
_operands.at(ind).data(std::move(data));
}
void Graph::addInput(const OperandIndex &ind, const std::string &name)
{
- assert(isBuildingPhase());
if (!name.empty())
_name_to_input.emplace(name, IOIndex{_inputs.size()});
_inputs.append(ind);
void Graph::addOutput(const OperandIndex &ind, const std::string &name)
{
- assert(isBuildingPhase());
if (!name.empty())
_name_to_output.emplace(name, IOIndex{_outputs.size()});
_outputs.append(ind);
return (itr == _name_to_output.end()) ? IOIndex{} : itr->second;
}
-void Graph::finishBuilding(void)
+void Graph::verify(void)
{
- assert(isBuildingPhase());
- _phase = Phase::MODEL;
-
- initializeUseDef();
- sweepGarbageOperands();
-
// Call graph verifications for the MODEL phase
{
// Except for edge consistency, the user might have been given a bad model
throw std::runtime_error{"One of model input and output operands does not exist."};
if (!verifier::DAGChecker().verify(*this))
throw std::runtime_error{"The graph is cyclic."};
- assert(verifier::EdgeConsistencyChecker().verify(*this));
+ assert(verifier::EdgeChecker().verify(*this));
}
// Check shape independent operation feature
});
}
-void Graph::sweepGarbageOperands()
+std::vector<ir::OperationIndex> Graph::topolSortOperations() const
{
- // Remove operands that are not used by any operations, except Graph inputs/outputs
- ir::OperandIndexMap<bool> visited;
-
- operations().iterate([&](const OperationIndex &, const Operation &node) {
- for (auto ind : node.getInputs() + node.getOutputs())
+ std::vector<ir::OperationIndex> ret;
+ util::Set<ir::OperationIndex> unvisited;
+ operations().iterate(
+ [&](const ir::OperationIndex &index, const ir::Operation &) { unvisited.add(index); });
+
+ std::function<void(const ir::OperationIndex &, const ir::Operation &)> dfs =
+ [&](const ir::OperationIndex &index, const ir::Operation &op) -> void {
+ if (!unvisited.contains(index))
+ return;
+ unvisited.remove(index);
+
+ for (const auto output : op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
{
- visited[ind] = true;
+ const auto &operand = operands().at(output);
+ for (const auto &use : operand.getUses())
+ {
+ dfs(use, operations().at(use));
+ }
}
- });
-
- // Graph's inputs/outputs are always reachable
- for (auto ind : getInputs() + getOutputs())
- {
- visited[ind] = true;
- }
-
- operands().iterate([&](const OperandIndex &ind, const Operand &) {
- if (!visited[ind])
- {
- VERBOSE(Graph::sweepGarbageOperands) << "Sweep garbage operand " << ind.value() << std::endl;
- operands().remove(ind);
- }
- });
+ ret.push_back(index);
+ };
+ operations().iterate(dfs);
+
+ assert(unvisited.empty()); // All of the nodes must have been visited
+ // Reversing Postorder DFS result to make it sorted in topoligical order
+ std::reverse(ret.begin(), ret.end());
+ return ret;
}
} // namespace ir
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GraphIterator.h"
-
-#include "ir/OperationIndexMap.h"
-#include "compiler/LoweredGraph.h"
-
-namespace onert
-{
-namespace ir
-{
-
-//
-// Graph::DefaultIterator
-//
-
-template <bool is_const>
-void DefaultIterator<is_const>::iterate(GraphRef graph, const IterFn &fn) const
-{
- graph.operations().iterate(
- [&](const OperationIndex &index, NodeRef node) -> void { fn(index, node); });
-}
-
-//
-// Graph::PostDfsIterator
-//
-
-template <bool is_const>
-void PostDfsIterator<is_const>::iterate(GraphRef graph, const IterFn &fn) const
-{
- assert(!graph.isBuildingPhase()); // Restrict iteration condition
-
- OperationIndexMap<bool> visited;
- graph.operations().iterate([&](const OperationIndex &index, NodeRef) { visited[index] = false; });
-
- std::function<void(const OperationIndex &, NodeRef)> dfs_recursive =
- [&](const OperationIndex &index, NodeRef node) -> void {
- if (visited[index])
- return;
- visited[index] = true;
-
- for (const auto output : node.getOutputs() | Remove::DUPLICATED | Remove::UNDEFINED)
- {
- const auto &operand = graph.operands().at(output);
- for (const auto &use : operand.getUses())
- {
- dfs_recursive(use, graph.operations().at(use));
- }
- }
-
- fn(index, node);
- };
-
- graph.operations().iterate(dfs_recursive);
-
- // All of the operations(nodes) must have been visited.
- assert(std::all_of(visited.begin(), visited.end(),
- [](const std::pair<const OperationIndex, bool> &v) { return v.second; }));
-}
-
-template <bool is_const>
-void PostDfsIterator<is_const>::iterateOpSeqs(LoweredGraphRef lowered_graph,
- const OpSeqIterFn &fn) const
-{
- std::unordered_map<OpSequenceIndex, bool> visited;
- lowered_graph.op_seqs().iterate(
- [&](const OpSequenceIndex &index, OpSequenceRef) { visited[index] = false; });
-
- std::function<void(const OpSequenceIndex &, OpSequenceRef)> dfs_recursive =
- [&](const OpSequenceIndex &index, OpSequenceRef op_seq) -> void {
- if (visited[index])
- return;
- visited[index] = true;
-
- for (const auto output : op_seq.getOutputs() | Remove::DUPLICATED | Remove::UNDEFINED)
- {
- const auto &operand = lowered_graph.graph().operands().at(output);
- for (const auto &use : operand.getUses())
- {
- const auto use_op_seq_index = lowered_graph.op_seqs().getOperation(use);
- dfs_recursive(use_op_seq_index, lowered_graph.op_seqs().at(use_op_seq_index));
- }
- }
-
- fn(index, op_seq);
- };
-
- lowered_graph.op_seqs().iterate(dfs_recursive);
-
- // All of the operations(nodes) must have been visited.
- assert(std::all_of(visited.begin(), visited.end(),
- [](const std::pair<const OpSequenceIndex, bool> &v) { return v.second; }));
-}
-
-// Explicit instantiations to have implementation in the source file.
-// NOTE If these instatiations were in the top of this file, `iterate` is compiled and saved in
-// `GraphIterator.cc.o` but `iterateOpSeqs`. This happens only when cross-building for Android.
-// (Maybe a bug of NDK toolchain(clang)?)
-
-template class DefaultIterator<true>;
-template class DefaultIterator<false>;
-
-template class PostDfsIterator<true>;
-template class PostDfsIterator<false>;
-
-} // namespace ir
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_GRAPH_ITERATOR_H__
-#define __ONERT_IR_GRAPH_ITERATOR_H__
-
-#include <type_traits>
-
-#include "ir/Index.h"
-
-namespace onert
-{
-namespace compiler
-{
-class LoweredGraph;
-} // namespace compiler
-} // namespace onert
-
-namespace onert
-{
-namespace ir
-{
-
-class Graph;
-class Operation;
-class OpSequence;
-
-template <bool is_const> class Iterator
-{
-public:
- using GraphRef = typename std::conditional<is_const, const Graph &, Graph &>::type;
- using IndexRef = const OperationIndex &;
- using NodeRef = typename std::conditional<is_const, const Operation &, Operation &>::type;
- using IterFn = std::function<void(IndexRef, NodeRef)>;
-
-public:
- virtual ~Iterator() = default;
- virtual void iterate(GraphRef graph, const IterFn &fn) const = 0;
-};
-
-template <bool is_const = false> class DefaultIterator final : public Iterator<is_const>
-{
-public:
- using GraphRef = typename Iterator<is_const>::GraphRef;
- using IndexRef = typename Iterator<is_const>::IndexRef;
- using NodeRef = typename Iterator<is_const>::NodeRef;
- using IterFn = typename Iterator<is_const>::IterFn;
-
-public:
- void iterate(GraphRef graph, const IterFn &fn) const;
-};
-using DefaultConstIterator = DefaultIterator<true>;
-
-template <bool is_const = false> class PostDfsIterator final : public Iterator<is_const>
-{
-public:
- using GraphRef = typename Iterator<is_const>::GraphRef;
- using IndexRef = typename Iterator<is_const>::IndexRef;
- using NodeRef = typename Iterator<is_const>::NodeRef;
- using IterFn = typename Iterator<is_const>::IterFn;
- using LoweredGraphRef =
- typename std::conditional<is_const, const typename compiler::LoweredGraph &,
- typename compiler::LoweredGraph &>::type;
- using OpSequenceRef = typename std::conditional<is_const, const OpSequence &, OpSequence &>::type;
- using OpSeqIndexRef = const OpSequenceIndex &;
- using OpSeqIterFn = std::function<void(OpSeqIndexRef, OpSequenceRef)>;
-
-public:
- void iterate(GraphRef graph, const IterFn &fn) const;
- void iterateOpSeqs(LoweredGraphRef lowered_graph, const OpSeqIterFn &f) const;
-};
-using PostDfsConstIterator = PostDfsIterator<true>;
-
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_GRAPH_ITERATOR_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/OpSequence.h"
-
-#include "ir/Operations.h"
-#include "ir/OperationVisitor.h"
-#include <sstream>
-
-namespace
-{
-
-std::string getStrFromIndice(const onert::ir::OperandIndexSequence &indice)
-{
- std::string str;
- for (const auto &ind : indice)
- {
- str += std::to_string(ind.value());
- str.push_back(',');
- }
- if (str.back() == ',')
- str.pop_back();
-
- return str;
-}
-}
-
-namespace onert
-{
-namespace ir
-{
-
-OpSequence::OpSequence(Layout layout) : _layout{layout}, _has_dynamic_tensor{false}
-{
- // DO NOTHING
-}
-
-void OpSequence::accept(OperationVisitor &v) const { v.visit(*this); }
-
-// TODO: Impl Dumper instead of this method
-std::string getStrFromOpSeq(const OpSequence &op_seq, const Operations &operations)
-{
- // " OpSequence IN(0,1,2) -> { op0(0,1,2:3), op1(3:4), op2(4:5) } -> OUT(5)"
- std::stringstream ss;
- ss << " OpSequence IN(" << getStrFromIndice(op_seq.getInputs()) << ") -> {";
- for (const auto &op_idx : op_seq)
- {
- ss << " " << op_idx.value() << "(" << operations.at(op_idx).name() << ":"
- << getStrFromIndice(operations.at(op_idx).getInputs()) << ":"
- << getStrFromIndice(operations.at(op_idx).getOutputs()) << ")";
- }
- ss << " } -> OUT(" << getStrFromIndice(op_seq.getOutputs()) << ")";
- return ss.str();
-}
-
-void OpSequence::remove(const OperationIndex &index)
-{
- assert(exist(index));
- for (auto it = _operations.cbegin(); it != _operations.cend(); ++it)
- {
- if (*it == index)
- {
- _operations.erase(it);
- break;
- }
- }
-}
-
-bool OpSequence::exist(const OperationIndex &index) const
-{
- for (const auto &inner_op_idx : _operations)
- {
- if (inner_op_idx == index)
- {
- return true;
- }
- }
- return false;
-}
-
-} // namespace ir
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/OpSequences.h"
-#include "util/logging.h"
-#include <memory>
-
-#include <cassert>
-#include <string>
-
-namespace onert
-{
-namespace ir
-{
-
-OpSequenceIndex OpSequences::emplace(const OperationIndex &index, Layout layout)
-{
- std::unique_ptr<OpSequence> op_seq = std::make_unique<OpSequence>(layout);
- op_seq->appendOperation(index);
- const OpSequenceIndex &seq_index = push(std::move(op_seq));
- cacheSequenceIndex(seq_index, index);
- return seq_index;
-}
-
-OpSequenceIndex OpSequences::emplace(std::unique_ptr<OpSequence> &&op_seq)
-{
- auto &operations = op_seq->operations();
- const OpSequenceIndex &seq_index = push(std::move(op_seq));
- for (const auto &op_idx : operations)
- {
- cacheSequenceIndex(seq_index, op_idx);
- }
- return seq_index;
-}
-
-void OpSequences::cacheSequenceIndex(const OpSequenceIndex &seq_index,
- const OperationIndex &op_index) const
-{
- _seq_indexes.emplace(op_index, seq_index);
-}
-
-OpSequenceIndex *OpSequences::findSequenceIndex(const OperationIndex &operation_index) const
-{
- // If opration_index is cached, return sequence_index from cache
- if (_seq_indexes.count(operation_index))
- {
- auto &op_seq_index = _seq_indexes.at(operation_index);
- if (_objects.count(op_seq_index) && _objects.at(op_seq_index)->exist(operation_index))
- {
- return &op_seq_index;
- }
- else
- {
- _seq_indexes.erase(operation_index);
- return nullptr;
- }
- }
- return nullptr;
-}
-
-bool OpSequences::containsOperation(const OperationIndex &operation_index) const
-{
- return findOperation(operation_index).valid();
-}
-
-OpSequenceIndex OpSequences::getOperation(const OperationIndex &operation_index) const
-{
- OpSequenceIndex ret = findOperation(operation_index);
- assert(ret.valid());
- return ret;
-}
-
-void OpSequences::removeFromOpSequence(const OperationIndex &operation_index)
-{
- const auto op_seq_index = findOperation(operation_index);
- auto &op_seq = at(op_seq_index);
- _seq_indexes.erase(operation_index);
- op_seq.remove(operation_index);
- if (op_seq.size() == 0)
- {
- remove(op_seq_index);
- }
-}
-
-OpSequenceIndex OpSequences::findOperation(const OperationIndex &operation_index) const
-{
- if (OpSequenceIndex *op_seq_index = findSequenceIndex(operation_index))
- return *op_seq_index;
-
- for (auto &e : _objects)
- {
- OpSequence &object = *e.second;
- auto it = find(object.operations().begin(), object.operations().end(), operation_index);
- if (it != object.operations().end())
- {
- cacheSequenceIndex(e.first, operation_index);
- return e.first;
- }
- }
- throw std::runtime_error("Operation not found");
-}
-
-void dumpOpSequences(const OpSequences &op_seqs, const Operations &operations)
-{
- op_seqs.iterate([&](const OpSequenceIndex &idx, const OpSequence &op_seq) {
- VERBOSE(OpSequences) << idx.value() << "] " << getStrFromOpSeq(op_seq, operations) << std::endl;
- });
-}
-
-} // namespace ir
-} // namespace onert
void Operand::unsetDef() { _def = OperationIndex{}; }
+void Operand::clearDefUse()
+{
+ unsetDef();
+ _uses.clear();
+}
+
} // namespace ir
} // namespace onert
return ret;
}
-std::ostream &operator<<(std::ostream &o, const OperandIndexSequence &op_seq)
+std::ostream &operator<<(std::ostream &o, const OperandIndexSequence &operand_seq)
{
std::string delimeter;
- for (const auto &ind : op_seq._vec)
+ for (const auto &ind : operand_seq._vec)
{
o << delimeter << ind;
delimeter = ',';
obj.iterate([&](const OperandIndex &index, const Operand &operand) {
_objects.emplace(index, std::make_unique<Operand>(operand));
});
- _index_count = obj._index_count;
+ _next_index = obj._next_index;
}
} // namespace ir
Operation::Operation(OperandConstraint input_constr, const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs, OperandConstraint output_constr)
- : _input_constr{input_constr}, _output_constr{output_constr}
+ : _input_constr{input_constr}, _output_constr{output_constr}
{
setInputs(inputs);
setOutputs(outputs);
}
Operation::Operation(OperandConstraint input_constr, OperandConstraint output_constr)
- : _input_constr{input_constr}, _output_constr{output_constr}
+ : _input_constr{input_constr}, _output_constr{output_constr}
{
}
namespace ir
{
+namespace
+{
+
+class OperationCloner : public OperationVisitor
+{
+public:
+#define OP(Name) void visit(const operation::Name &o) override;
+#include "ir/Operations.lst"
+#undef OP
+
+public:
+ std::unique_ptr<Operation> releaseClone();
+
+private:
+ std::unique_ptr<Operation> _return_op;
+};
+
#define OP(Name) \
void OperationCloner::visit(const operation::Name &o) \
{ \
return std::move(_return_op);
}
+} // namespace
+
+std::unique_ptr<Operation> clone(const Operation &operation)
+{
+ OperationCloner cloner;
+ operation.accept(cloner);
+ return cloner.releaseClone();
+}
+
} // namespace ir
} // namespace onert
namespace ir
{
-class OperationCloner : public OperationVisitor
-{
-public:
-#define OP(Name) void visit(const operation::Name &o) override;
-#include "ir/Operations.lst"
-#undef OP
-
-public:
- std::unique_ptr<Operation> releaseClone();
-
-private:
- std::unique_ptr<Operation> _return_op;
-};
+std::unique_ptr<Operation> clone(const Operation &operation);
} // namespace ir
} // namespace onert
void OperationDumper::visit(const BatchToSpaceND &node)
{
std::string block_size =
- "BlockSize(" +
- std::to_string(node.getInputs().at(BatchToSpaceND::Input::BLOCK_SIZE).value()) + ")";
+ "BlockSize(" + std::to_string(node.getInputs().at(BatchToSpaceND::Input::BLOCK_SIZE).value()) +
+ ")";
dumpUnaryInputOp(node, block_size);
}
void OperationDumper::visit(const Conv2D &node)
{
std::string padding_type =
- node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
+ node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
dumpConvOp(node, padding_type);
}
void OperationDumper::visit(const DepthwiseConv2D &node)
{
std::string padding_type =
- node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
+ node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
dumpConvOp(node, padding_type);
}
void OperationDumper::visit(const ExpandDims &node)
{
std::string axis =
- "AXIS(" + std::to_string(node.getInputs().at(ExpandDims::Input::AXIS).value()) + ")";
+ "AXIS(" + std::to_string(node.getInputs().at(ExpandDims::Input::AXIS).value()) + ")";
dumpUnaryInputOp(node, axis);
}
void OperationDumper::visit(const FullyConnected &node)
{
std::string inputs =
- "Weight(" + std::to_string(node.getInputs().at(FullyConnected::Input::WEIGHT).value()) +
- ") Bias(" + std::to_string(node.getInputs().at(FullyConnected::Input::BIAS).value()) + ")";
+ "Weight(" + std::to_string(node.getInputs().at(FullyConnected::Input::WEIGHT).value()) +
+ ") Bias(" + std::to_string(node.getInputs().at(FullyConnected::Input::BIAS).value()) + ")";
dumpUnaryInputOp(node, inputs);
}
void OperationDumper::visit(const Gather &node)
{
std::string indices =
- "Indices(" + std::to_string(node.getInputs().at(Gather::Input::INDICES).value()) + ")";
+ "Indices(" + std::to_string(node.getInputs().at(Gather::Input::INDICES).value()) + ")";
dumpUnaryInputOp(node, indices);
}
void OperationDumper::visit(const InstanceNorm &node)
{
std::string inputs =
- "Gamma(" + std::to_string(node.getInputs().at(InstanceNorm::Input::GAMMA).value()) +
- ") Beta(" + std::to_string(node.getInputs().at(InstanceNorm::Input::BETA).value()) + ")";
+ "Gamma(" + std::to_string(node.getInputs().at(InstanceNorm::Input::GAMMA).value()) + ") Beta(" +
+ std::to_string(node.getInputs().at(InstanceNorm::Input::BETA).value()) + ")";
dumpUnaryInputOp(node, inputs);
}
{
VERBOSE(LIR) << "* " << node.name() << std::endl;
VERBOSE(LIR)
- << " - Inputs : Input(" << node.getInputs().at(LSTM::Input::INPUT)
- << ") Input To Input Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_INPUT_WEIGHTS)
- << ") Input To Forget Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_FORGET_WEIGHTS)
- << ") Input To Cell Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_CELL_WEIGHTS)
- << ") Input To Output Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)
- << ") Recurrent To Input Weights("
- << node.getInputs().at(LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)
- << ") Recurrent To Forget Weights("
- << node.getInputs().at(LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)
- << ") Recurrent To Cell Weights("
- << node.getInputs().at(LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)
- << ") Recurrent To Output Weights("
- << node.getInputs().at(LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS) << ") Cell To Input Weights("
- << node.getInputs().at(LSTM::Input::CELL_TO_INPUT_WEIGHTS) << ") Cell To Forget Weights("
- << node.getInputs().at(LSTM::Input::CELL_TO_FORGET_WEIGHTS) << ") Cell To OUTPUT Weights("
- << node.getInputs().at(LSTM::Input::CELL_TO_OUTPUT_WEIGHTS) << ") Input Gate Bias("
- << node.getInputs().at(LSTM::Input::INPUT_GATE_BIAS) << ") Forget Gate Bias("
- << node.getInputs().at(LSTM::Input::FORGET_GATE_BIAS) << ") Cell Bias("
- << node.getInputs().at(LSTM::Input::CELL_BIAS) << ") Output Gate Bias("
- << node.getInputs().at(LSTM::Input::OUTPUT_GATE_BIAS) << ") Projection Weights("
- << node.getInputs().at(LSTM::Input::PROJECTION_WEIGHTS) << ") Projection Bias("
- << node.getInputs().at(LSTM::Input::PROJECTION_BIAS) << ") Output State In("
- << node.getInputs().at(LSTM::Input::OUTPUT_STATE_IN) << ") Cell State In("
- << node.getInputs().at(LSTM::Input::CELL_STATE_IN);
+ << " - Inputs : Input(" << node.getInputs().at(LSTM::Input::INPUT)
+ << ") Input To Input Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_INPUT_WEIGHTS)
+ << ") Input To Forget Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_FORGET_WEIGHTS)
+ << ") Input To Cell Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_CELL_WEIGHTS)
+ << ") Input To Output Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)
+ << ") Recurrent To Input Weights("
+ << node.getInputs().at(LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)
+ << ") Recurrent To Forget Weights("
+ << node.getInputs().at(LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)
+ << ") Recurrent To Cell Weights(" << node.getInputs().at(LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)
+ << ") Recurrent To Output Weights("
+ << node.getInputs().at(LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS) << ") Cell To Input Weights("
+ << node.getInputs().at(LSTM::Input::CELL_TO_INPUT_WEIGHTS) << ") Cell To Forget Weights("
+ << node.getInputs().at(LSTM::Input::CELL_TO_FORGET_WEIGHTS) << ") Cell To OUTPUT Weights("
+ << node.getInputs().at(LSTM::Input::CELL_TO_OUTPUT_WEIGHTS) << ") Input Gate Bias("
+ << node.getInputs().at(LSTM::Input::INPUT_GATE_BIAS) << ") Forget Gate Bias("
+ << node.getInputs().at(LSTM::Input::FORGET_GATE_BIAS) << ") Cell Bias("
+ << node.getInputs().at(LSTM::Input::CELL_BIAS) << ") Output Gate Bias("
+ << node.getInputs().at(LSTM::Input::OUTPUT_GATE_BIAS) << ") Projection Weights("
+ << node.getInputs().at(LSTM::Input::PROJECTION_WEIGHTS) << ") Projection Bias("
+ << node.getInputs().at(LSTM::Input::PROJECTION_BIAS) << ") Output State In("
+ << node.getInputs().at(LSTM::Input::OUTPUT_STATE_IN) << ") Cell State In("
+ << node.getInputs().at(LSTM::Input::CELL_STATE_IN);
if (node.getInputs().size() == 24)
{
VERBOSE(LIR) << ") Input Layer Normalization Weights("
void OperationDumper::visit(const Pool2D &node)
{
std::string padding_type =
- node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
+ node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
VERBOSE(LIR) << "* " << node.name() << "(" << padding_type << ")" << std::endl;
VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(Pool2D::Input::INPUT) << ")"
<< std::endl;
void OperationDumper::visit(const PReLU &node)
{
std::string alpha =
- "Alpha(" + std::to_string(node.getInputs().at(PReLU::Input::ALPHA).value()) + ")";
+ "Alpha(" + std::to_string(node.getInputs().at(PReLU::Input::ALPHA).value()) + ")";
dumpUnaryInputOp(node, alpha);
}
{
// optional param
std::string shape =
- node.getInputs().size() == 2
- ? "Shape(" + std::to_string(node.getInputs().at(Reshape::Input::SHAPE).value()) + ")"
- : "Shape(not provided)";
+ node.getInputs().size() == 2
+ ? "Shape(" + std::to_string(node.getInputs().at(Reshape::Input::SHAPE).value()) + ")"
+ : "Shape(not provided)";
dumpUnaryInputOp(node, shape);
}
void OperationDumper::visit(const Reverse &node)
{
std::string axis =
- "Axis(" + std::to_string(node.getInputs().at(Reverse::Input::AXIS).value()) + ")";
+ "Axis(" + std::to_string(node.getInputs().at(Reverse::Input::AXIS).value()) + ")";
dumpUnaryInputOp(node, axis);
}
void OperationDumper::visit(const SpaceToBatchND &node)
{
std::string inputs =
- "BlockSize(" +
- std::to_string(node.getInputs().at(SpaceToBatchND::Input::BLOCK_SIZE).value()) +
- ") Paddings(" + std::to_string(node.getInputs().at(SpaceToBatchND::Input::PADDINGS).value()) +
- ")";
+ "BlockSize(" + std::to_string(node.getInputs().at(SpaceToBatchND::Input::BLOCK_SIZE).value()) +
+ ") Paddings(" + std::to_string(node.getInputs().at(SpaceToBatchND::Input::PADDINGS).value()) +
+ ")";
dumpUnaryInputOp(node, inputs);
}
void OperationDumper::visit(const Tile &node)
{
std::string multiples =
- "Multiples(" + std::to_string(node.getInputs().at(Tile::Input::MULTIPLES).value()) + ")";
+ "Multiples(" + std::to_string(node.getInputs().at(Tile::Input::MULTIPLES).value()) + ")";
dumpUnaryInputOp(node, multiples);
}
void OperationDumper::visit(const TransposeConv &node)
{
std::string padding_type =
- node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
+ node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
VERBOSE(LIR) << "* TransposeConv(" << padding_type << ")" << std::endl;
VERBOSE(LIR) << " - Inputs : Output Shape("
<< node.getInputs().at(TransposeConv::Input::OUTPUT_SHAPE) << ") KERNEL("
#include "OperationValidator.h"
#include "ir/Graph.h"
+#include "util/logging.h"
#define OP_REQUIRES(EXP) \
do \
{
OperationValidator::OperationValidator(const Graph &graph)
- : _operations{graph.operations()}, _operands{graph.operands()}
+ : _operations{graph.operations()}, _operands{graph.operands()}
{
}
if (_operands.at(idx1).typeInfo().scale() != _operands.at(idx2).typeInfo().scale())
return false;
- if (_operands.at(idx1).typeInfo().offset() != _operands.at(idx2).typeInfo().offset())
+ if (_operands.at(idx1).typeInfo().zero_point() != _operands.at(idx2).typeInfo().zero_point())
return false;
return true;
void OperationValidator::visit(const operation::Conv2D &node)
{
const auto input_index{node.getInputs().at(operation::Conv2D::Input::INPUT)};
+ const auto kernel_index{node.getInputs().at(operation::Conv2D::Input::KERNEL)};
const auto output_index{node.getOutputs().at(0)};
uint32_t stride_horizontal = node.param().stride.horizontal;
OP_REQUIRES((stride_horizontal > 0) && (stride_vertical > 0));
OP_REQUIRES((dilation_width > 0) && (dilation_height > 0));
OP_REQUIRES(isSameType(input_index, output_index));
+
+ if (isConstant(kernel_index) && operandType(kernel_index) == DataType::QUANT_INT8_ASYMM)
+ {
+ for (const auto zeropoint : _operands.at(kernel_index).typeInfo().zero_points())
+ OP_REQUIRES(zeropoint == 0);
+ }
}
void OperationValidator::visit(const operation::DepthToSpace &node)
void OperationValidator::visit(const operation::DepthwiseConv2D &node)
{
const auto input_index{node.getInputs().at(operation::DepthwiseConv2D::Input::INPUT)};
+ const auto kernel_index{node.getInputs().at(operation::DepthwiseConv2D::Input::KERNEL)};
const auto output_index{node.getOutputs().at(0)};
uint32_t stride_horizontal = node.param().stride.horizontal;
OP_REQUIRES((stride_horizontal > 0) && (stride_vertical > 0));
OP_REQUIRES((dilation_width > 0) && (dilation_height > 0));
OP_REQUIRES(isSameType(input_index, output_index));
+
+ if (isConstant(kernel_index) && operandType(kernel_index) == DataType::QUANT_INT8_ASYMM)
+ {
+ for (const auto zeropoint : _operands.at(kernel_index).typeInfo().zero_points())
+ OP_REQUIRES(zeropoint == 0);
+ }
}
void OperationValidator::visit(const operation::ElementwiseActivation &node)
break;
case operation::ElementwiseActivation::Type::LEAKY_RELU:
OP_REQUIRES(
- isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
- DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM}));
+ isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
+ DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM}));
break;
case operation::ElementwiseActivation::Type::LOGISTIC:
OP_REQUIRES(
- isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
- DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM}));
+ isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
+ DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM}));
break;
case operation::ElementwiseActivation::Type::RELU:
- OP_REQUIRES(isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
- DataType::QUANT_INT8_ASYMM}));
+ OP_REQUIRES(isValidType(
+ input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM}));
break;
case operation::ElementwiseActivation::Type::TANH:
OP_REQUIRES(
- isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
- DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM}));
+ isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
+ DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM}));
break;
}
}
}
else if (node.param().op_type == operation::ElementwiseUnary::Type::QUANTIZE)
{
- OP_REQUIRES(isValidType(input_index, DataType::FLOAT32));
- OP_REQUIRES(isValidType(output_index, DataType::QUANT_UINT8_ASYMM));
+ OP_REQUIRES(isValidType(
+ input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM}));
+ OP_REQUIRES(
+ isValidType(output_index, {DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM}));
}
else if (node.param().op_type == operation::ElementwiseUnary::Type::FLOOR)
{
// TFLite: Allow hybrid type - value table & output
// NNAPI: Require same value table and output type
OP_REQUIRES(
- isSameType(values_index, output_index) ||
- (isValidType(output_index, DataType::FLOAT32) &&
- (isValidType(values_index, {DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT8_SYMM}))));
+ isSameType(values_index, output_index) ||
+ (isValidType(output_index, DataType::FLOAT32) &&
+ (isValidType(values_index, {DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT8_SYMM}))));
}
void OperationValidator::visit(const operation::ExpandDims &node)
void OperationValidator::visit(const operation::Pad &node)
{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(operation::Pad::Input::INPUT)};
const auto pad_index{node.getInputs().at(operation::Pad::Input::PAD)};
+ bool isQuantType =
+ isValidType(output_index, {DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM});
+ bool isPadV2 = node.getInputs().size() == 3 ? true : false;
OP_REQUIRES(isValidType(pad_index, DataType::INT32));
+ OP_REQUIRES(isSameType(input_index, output_index));
+
+ if (isQuantType)
+ OP_REQUIRES(isSameQuantParam(input_index, output_index));
+
+ if (isPadV2)
+ {
+ const auto value_index{node.getInputs().at(operation::Pad::Input::VALUE)};
+ const bool cond_same = isSameType(input_index, value_index);
+ const bool cond_same_quant = (!isQuantType || isSameQuantParam(input_index, value_index));
+ const auto input_t = operandType(input_index);
+ const auto value_t = operandType(value_index);
+ // NNAPI accepts this case. scale and zeroPoint are assumed to be the same as in input0.
+ const bool cond_quant8 =
+ ((input_t == DataType::QUANT_UINT8_ASYMM || input_t == DataType::QUANT_INT8_ASYMM) &&
+ value_t == DataType::INT32);
+ OP_REQUIRES((cond_same && cond_same_quant) || cond_quant8);
+ }
}
void OperationValidator::visit(const operation::Rank &node)
OP_REQUIRES(isValidType(output_index, {DataType::UINT32, DataType::INT32, DataType::INT64}));
}
+void OperationValidator::visit(const operation::Slice &node)
+{
+ const auto begins_index{node.getInputs().at(operation::Slice::BEGINS)};
+ const auto sizes_index{node.getInputs().at(operation::Slice::SIZES)};
+
+ OP_REQUIRES(isValidType(begins_index, {DataType::INT32, DataType::INT64}));
+ OP_REQUIRES(isSameType(begins_index, sizes_index));
+}
+
+void OperationValidator::visit(const operation::Softmax &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(operation::Softmax::INPUT)};
+
+ OP_REQUIRES(isSameType(input_index, output_index));
+ OP_REQUIRES(isValidType(
+ output_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM}));
+}
+
void OperationValidator::visit(const operation::SpaceToBatchND &node)
{
const auto block_size_index{node.getInputs().at(operation::SpaceToBatchND::Input::BLOCK_SIZE)};
OP_REQUIRES(isSameType(lhs_index, rhs_index));
}
+void OperationValidator::visit(const operation::StatelessRandomUniform &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto shape_index{node.getInputs().at(operation::StatelessRandomUniform::Input::SHAPE)};
+ const auto seed_index{node.getInputs().at(operation::StatelessRandomUniform::Input::SEED)};
+
+ OP_REQUIRES(isValidType(output_index, DataType::FLOAT32));
+ OP_REQUIRES(isValidType(shape_index, DataType::INT32));
+ OP_REQUIRES(isValidType(seed_index, DataType::INT32));
+}
+
void OperationValidator::visit(const operation::StridedSlice &node)
{
const auto output_index{node.getOutputs().at(0)};
OP_REQUIRES(node.getInputs().size() == node.getOutputs().size());
}
-} // namespace compiler
+} // namespace ir
} // namespace onert
#define __ONERT_IR_OPERATION_VALIDATOR_H__
#include "ir/OperationVisitor.h"
+#include "ir/Operations.h"
+#include "ir/Operands.h"
namespace onert
{
void visit(const operation::Reverse &node) override;
void visit(const operation::Select &node) override;
void visit(const operation::Shape &node) override;
+ void visit(const operation::Slice &node) override;
+ void visit(const operation::Softmax &node) override;
void visit(const operation::SpaceToBatchND &node) override;
void visit(const operation::SpaceToDepth &node) override;
void visit(const operation::Split &node) override;
void visit(const operation::SquaredDifference &node) override;
+ void visit(const operation::StatelessRandomUniform &node) override;
void visit(const operation::StridedSlice &node) override;
void visit(const operation::TransposeConv &node) override;
void visit(const operation::Unpack &node) override;
Operations::Operations(const Operations &obj)
{
- obj.iterate([&](const OperationIndex &index, const Operation &op) {
- OperationCloner cloner;
- op.accept(cloner);
- _objects.emplace(index, cloner.releaseClone());
- });
- _index_count = obj._index_count;
+ obj.iterate(
+ [&](const OperationIndex &index, const Operation &op) { _objects.emplace(index, clone(op)); });
+ _next_index = obj._next_index;
}
} // namespace ir
const int32_t vertical_expected_output = (ifm_shape.H + stride.vertical - 1) / stride.vertical;
const int32_t horizontal_expected_output =
- (ifm_shape.W + stride.horizontal - 1) / stride.horizontal;
+ (ifm_shape.W + stride.horizontal - 1) / stride.horizontal;
const int32_t vertical_needed_input =
- (vertical_expected_output - 1) * stride.vertical + effective_filter_h_size;
+ (vertical_expected_output - 1) * stride.vertical + effective_filter_h_size;
const int32_t vertical_total_padding = std::max(0, vertical_needed_input - ifm_shape.H);
const int32_t horizontal_needed_input =
- (horizontal_expected_output - 1) * stride.horizontal + effective_filter_w_size;
+ (horizontal_expected_output - 1) * stride.horizontal + effective_filter_w_size;
const int32_t horizontal_total_padding = std::max(0, horizontal_needed_input - ifm_shape.W);
padding.top = vertical_total_padding / 2;
{
const int32_t vertical_expected_output = (ifm_shape.H + stride.vertical - 1) / stride.vertical;
const int32_t horizontal_expected_output =
- (ifm_shape.W + stride.horizontal - 1) / stride.horizontal;
+ (ifm_shape.W + stride.horizontal - 1) / stride.horizontal;
assert(vertical_expected_output == ofm_shape.H);
assert(horizontal_expected_output == ofm_shape.W);
}
Padding::Padding(uint32_t left, uint32_t right, uint32_t top, uint32_t bottom)
- : type{PaddingType::EXPLICIT}, param{left, right, top, bottom}
+ : type{PaddingType::EXPLICIT}, param{left, right, top, bottom}
{
// DO NOTHING
}
std::multiplies<uint64_t>());
}
-Shape permuteShape(const Shape &shape, Layout frontend_layout, Layout backend_layout)
+Shape permuteShape(const Shape &shape, Layout from, Layout to)
{
assert(shape.rank() <= Shape::MAX_RANK);
- Shape backend_shape{shape};
- if (shape.rank() >= 4 && frontend_layout == Layout::NHWC && backend_layout == Layout::NCHW)
+ Shape ret{shape};
+ if (from == to)
+ return ret;
+ if (shape.rank() < 4)
+ return ret;
+ // Permutation changing layout beyond 4-D is not supported yet
+ assert(shape.rank() <= 4);
+ if (from == Layout::NHWC && to == Layout::NCHW)
{
- // Permutation changing layout beyond 4-D is not supported yet
- assert(shape.rank() <= 4);
- backend_shape.dim(1) = shape.dim(3);
- backend_shape.dim(2) = shape.dim(1);
- backend_shape.dim(3) = shape.dim(2);
+ ret.dim(1) = shape.dim(3);
+ ret.dim(2) = shape.dim(1);
+ ret.dim(3) = shape.dim(2);
}
- else if (shape.rank() >= 4 && frontend_layout == Layout::NCHW && backend_layout == Layout::NHWC)
+ else if (from == Layout::NCHW && to == Layout::NHWC)
{
- // Permutation changing layout beyond 4-D is not supported yet
- assert(shape.rank() <= 4);
- backend_shape.dim(1) = shape.dim(2);
- backend_shape.dim(2) = shape.dim(3);
- backend_shape.dim(3) = shape.dim(1);
+ ret.dim(1) = shape.dim(2);
+ ret.dim(2) = shape.dim(3);
+ ret.dim(3) = shape.dim(1);
}
- return backend_shape;
+ // Other cases(either `from` or `to` is UNKNOWN), just return the original shape
+ return ret;
}
} // namespace ir
return false;
}
- if (lhs.offset() != rhs.offset())
+ if (lhs.zero_point() != rhs.zero_point())
{
return false;
}
void AddN::accept(OperationVisitor &v) const { v.visit(*this); }
AddN::AddN(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(inputs.size()), inputs, outputs}
+ : Operation{OperandConstraint::createExact(inputs.size()), inputs, outputs}
{
}
ArgMinMax::ArgMinMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param ¶m)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
{
}
BCQFullyConnected::BCQFullyConnected(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs, const Param ¶m)
- : Operation{OperandConstraint::createExact(5u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(5u), inputs, outputs}, _param{param}
{
}
BCQGather::BCQGather(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param ¶m)
- : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param}
{
}
BatchMatMul::BatchMatMul(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param ¶m)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
{
}
BatchToSpaceND::BatchToSpaceND(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs}
+ : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs}
{
}
BinaryArithmetic::BinaryArithmetic(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs, const Param ¶m)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
{
}
{
using ArithmeticType = onert::ir::operation::BinaryArithmetic::ArithmeticType;
static const std::unordered_map<ArithmeticType, std::string> name_map{
- {ArithmeticType::ADD, std::string{"Add"}},
- {ArithmeticType::SUB, std::string{"Sub"}},
- {ArithmeticType::MUL, std::string{"Mul"}},
- {ArithmeticType::DIV, std::string{"Div"}}};
+ {ArithmeticType::ADD, std::string{"Add"}},
+ {ArithmeticType::SUB, std::string{"Sub"}},
+ {ArithmeticType::MUL, std::string{"Mul"}},
+ {ArithmeticType::DIV, std::string{"Div"}}};
return name_map.at(_param.arithmetic_type);
}
void BroadcastTo::accept(OperationVisitor &v) const { v.visit(*this); }
BroadcastTo::BroadcastTo(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
{
}
Comparison::Comparison(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param ¶m)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
{
}
Concat::Concat(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param ¶m)
- : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param}
{
}
Conv2D::Conv2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param ¶m)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
{
}
ConvertFp16ToFp32::ConvertFp16ToFp32(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}
{
}
ConvertFp32ToFp16::ConvertFp32ToFp16(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}
{
}
Custom::Custom(OperandConstraint input_constr, const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs, std::string id, const Userdata &userdata)
- : Operation{input_constr, inputs, outputs}, _id(std::move(id)), _userdata(userdata)
+ : Operation{input_constr, inputs, outputs}, _id(std::move(id)), _userdata(userdata)
{
}
DepthToSpace::DepthToSpace(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param ¶m)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
{
}
DepthwiseConv2D::DepthwiseConv2D(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs, const Param ¶m)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
{
}
Einsum::Einsum(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param ¶m)
- : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param}
{
}
ElementwiseActivation::ElementwiseActivation(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs,
const Param ¶m)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
{
if (param.op_type == Type::LOGISTIC)
{
- assert(param.alpha == 0.0f && param.beta == 0.0f && "Logistic will be supported only as "
- "sigmoid function(L=1, k=1, x0=0). So, do "
- "not use alpha and beta");
+ assert(param.alpha == 0.0f && param.beta == 0.0f &&
+ "Logistic will be supported only as "
+ "sigmoid function(L=1, k=1, x0=0). So, do "
+ "not use alpha and beta");
}
else if (param.op_type == Type::RELU)
{
}
else if (param.op_type == Type::TANH)
{
- assert(param.alpha == 1.0f && param.beta == 1.0f && "f(x) = alpha * tanh(beta * x), Tanh is "
- "supported only the values of alpha and "
- "beta are 1.f");
+ assert(param.alpha == 1.0f && param.beta == 1.0f &&
+ "f(x) = alpha * tanh(beta * x), Tanh is "
+ "supported only the values of alpha and "
+ "beta are 1.f");
}
}
{
using ElementwiseActivationType = onert::ir::operation::ElementwiseActivation::Type;
static const std::unordered_map<Type, std::string> name_map{
- {ElementwiseActivationType::ELU, "ELU"},
- {ElementwiseActivationType::LOGISTIC, "Logistic"},
- {ElementwiseActivationType::RELU, "ReLU"},
- {ElementwiseActivationType::TANH, "Tanh"},
- {ElementwiseActivationType::LEAKY_RELU, "LeakyRelu"}};
+ {ElementwiseActivationType::ELU, "ELU"},
+ {ElementwiseActivationType::LOGISTIC, "Logistic"},
+ {ElementwiseActivationType::RELU, "ReLU"},
+ {ElementwiseActivationType::TANH, "Tanh"},
+ {ElementwiseActivationType::LEAKY_RELU, "LeakyRelu"}};
return name_map.at(_param.op_type);
}
ElementwiseBinary::ElementwiseBinary(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs, const Param ¶m)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
{
}
{
using ElementwiseBinaryType = onert::ir::operation::ElementwiseBinary::ElementwiseBinaryType;
static const std::unordered_map<ElementwiseBinaryType, std::string> name_map{
- {ElementwiseBinaryType::LOGICAL_AND, std::string{"LogicalAnd"}},
- {ElementwiseBinaryType::LOGICAL_OR, std::string{"LogicalOr"}},
- {ElementwiseBinaryType::MAX, std::string{"Max"}},
- {ElementwiseBinaryType::MIN, std::string{"Min"}}};
+ {ElementwiseBinaryType::LOGICAL_AND, std::string{"LogicalAnd"}},
+ {ElementwiseBinaryType::LOGICAL_OR, std::string{"LogicalOr"}},
+ {ElementwiseBinaryType::MAX, std::string{"Max"}},
+ {ElementwiseBinaryType::MIN, std::string{"Min"}}};
return name_map.at(_param.op_type);
}
ElementwiseUnary::ElementwiseUnary(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs, const Param ¶m)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs,
- OperandConstraint::createExact(1u)},
- _param{param}
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs,
+ OperandConstraint::createExact(1u)},
+ _param{param}
{
}
{
using ElementwiseUnaryType = onert::ir::operation::ElementwiseUnary::Type;
static const std::unordered_map<ElementwiseUnaryType, std::string> name_map{
- {ElementwiseUnaryType::ABS, std::string{"Abs"}},
- {ElementwiseUnaryType::CAST, std::string{"Cast"}},
- {ElementwiseUnaryType::COS, std::string{"Cos"}},
- {ElementwiseUnaryType::DEQUANTIZE, std::string{"Dequantize"}},
- {ElementwiseUnaryType::ERF, std::string{"Erf"}},
- {ElementwiseUnaryType::EXP, std::string{"Exp"}},
- {ElementwiseUnaryType::FLOOR, std::string{"Floor"}},
- {ElementwiseUnaryType::LOG, std::string{"Log"}},
- {ElementwiseUnaryType::LOGICAL_NOT, std::string{"LogicalNot"}},
- {ElementwiseUnaryType::NEG, std::string{"Neg"}},
- {ElementwiseUnaryType::QUANTIZE, std::string{"Quantize"}},
- {ElementwiseUnaryType::ROUND, std::string{"Round"}},
- {ElementwiseUnaryType::RSQRT, std::string{"RSqrt"}},
- {ElementwiseUnaryType::SIN, std::string{"Sin"}},
- {ElementwiseUnaryType::SQRT, std::string{"Sqrt"}},
- {ElementwiseUnaryType::SQUARE, std::string{"Square"}},
- {ElementwiseUnaryType::ZEROS_LIKE, std::string{"ZerosLike"}}};
+ {ElementwiseUnaryType::ABS, std::string{"Abs"}},
+ {ElementwiseUnaryType::CAST, std::string{"Cast"}},
+ {ElementwiseUnaryType::COS, std::string{"Cos"}},
+ {ElementwiseUnaryType::DEQUANTIZE, std::string{"Dequantize"}},
+ {ElementwiseUnaryType::ERF, std::string{"Erf"}},
+ {ElementwiseUnaryType::EXP, std::string{"Exp"}},
+ {ElementwiseUnaryType::FLOOR, std::string{"Floor"}},
+ {ElementwiseUnaryType::LOG, std::string{"Log"}},
+ {ElementwiseUnaryType::LOGICAL_NOT, std::string{"LogicalNot"}},
+ {ElementwiseUnaryType::NEG, std::string{"Neg"}},
+ {ElementwiseUnaryType::QUANTIZE, std::string{"Quantize"}},
+ {ElementwiseUnaryType::ROUND, std::string{"Round"}},
+ {ElementwiseUnaryType::RSQRT, std::string{"RSqrt"}},
+ {ElementwiseUnaryType::SIN, std::string{"Sin"}},
+ {ElementwiseUnaryType::SQRT, std::string{"Sqrt"}},
+ {ElementwiseUnaryType::SQUARE, std::string{"Square"}},
+ {ElementwiseUnaryType::ZEROS_LIKE, std::string{"ZerosLike"}}};
return name_map.at(_param.op_type);
}
EmbeddingLookup::EmbeddingLookup(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
{
}
void ExpandDims::accept(OperationVisitor &v) const { v.visit(*this); }
ExpandDims::ExpandDims(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
{
}
void Fill::accept(OperationVisitor &v) const { v.visit(*this); }
Fill::Fill(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
{
}
FullyConnected::FullyConnected(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs, const Param ¶m)
- : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs}, _param{param}
{
}
FusedBatchNorm::FusedBatchNorm(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs, const Param ¶m)
- : Operation{OperandConstraint::createAtLeast(5u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createAtLeast(5u), inputs, outputs}, _param{param}
{
}
Gather::Gather(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param ¶m)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
{
}
HashtableLookup::HashtableLookup(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}
{
}
{
void If::accept(OperationVisitor &v) const { v.visit(*this); }
If::If(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m)
- : Operation{OperandConstraint::createAny(), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createAny(), inputs, outputs}, _param{param}
{
}
} // namespace operation
InstanceNorm::InstanceNorm(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param ¶m)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
{
}
L2Normalization::L2Normalization(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}
{
}
LSTM::LSTM(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param ¶m)
- : Operation{OperandConstraint::createInRange(20u, 24u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createInRange(20u, 24u), inputs, outputs}, _param{param}
{
}
LocalResponseNormalization::LocalResponseNormalization(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs,
const Param ¶m)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
{
}
LogSoftmax::LogSoftmax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param ¶m)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
{
}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/LowerInfo.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-LowerInfo::LowerInfo(const backend::Backend *backend, Layout layout)
- : _permute_factor{backend, layout}
-{
- // DO NOTHING
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
MatrixBandPart::MatrixBandPart(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}
{
}
OneHot::OneHot(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param ¶m)
- : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param}
{
}
void PReLU::accept(OperationVisitor &v) const { v.visit(*this); }
PReLU::PReLU(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
{
}
void Pack::accept(OperationVisitor &v) const { v.visit(*this); }
Pack::Pack(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param ¶m)
- : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param}
{
}
} // namespace operation
// PAD: 2 inputs
// PADV2: 3 inputs
Pad::Pad(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs}
+ : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs}
{
}
void Permute::accept(OperationVisitor &v) const { v.visit(*this); }
Permute::Permute(const OperandIndex &input, const OperandIndex &output, Type type)
- : Operation{OperandConstraint::createExact(1u)}, _type{type}
+ : Operation{OperandConstraint::createExact(1u)}, _type{type}
{
setInputs({input});
setOutputs({output});
Pool2D::Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param ¶m)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
{
}
{
using PoolType = onert::ir::operation::Pool2D::PoolType;
static const std::unordered_map<PoolType, std::string> name_map{
- {PoolType::AVG, "Avg" + std::string{toString(opcode())}},
- {PoolType::L2, "L2" + std::string{toString(opcode())}},
- {PoolType::MAX, "Max" + std::string{toString(opcode())}}};
+ {PoolType::AVG, "Avg" + std::string{toString(opcode())}},
+ {PoolType::L2, "L2" + std::string{toString(opcode())}},
+ {PoolType::MAX, "Max" + std::string{toString(opcode())}}};
return name_map.at(_param.op_type);
}
void Pow::accept(OperationVisitor &v) const { v.visit(*this); }
Pow::Pow(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
{
}
RNN::RNN(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param ¶m)
- : Operation{OperandConstraint::createExact(5u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(5u), inputs, outputs}, _param{param}
{
}
void Range::accept(OperationVisitor &v) const { v.visit(*this); }
Range::Range(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}
{
}
void Rank::accept(OperationVisitor &v) const { v.visit(*this); }
Rank::Rank(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}
{
}
Reduce::Reduce(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param ¶m)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
{
}
{
using ReduceType = onert::ir::operation::Reduce::ReduceType;
static const std::unordered_map<ReduceType, std::string> name_map{
- {ReduceType::ALL, std::string{toString(opcode())} + "All"},
- {ReduceType::ANY, std::string{toString(opcode())} + "Any"},
- {ReduceType::MAX, std::string{toString(opcode())} + "Max"},
- {ReduceType::MEAN, std::string{toString(opcode())} + "Mean"},
- {ReduceType::MIN, std::string{toString(opcode())} + "Min"},
- {ReduceType::PROD, std::string{toString(opcode())} + "Prod"},
- {ReduceType::SUM, std::string{toString(opcode())} + "SUM"}};
+ {ReduceType::ALL, std::string{toString(opcode())} + "All"},
+ {ReduceType::ANY, std::string{toString(opcode())} + "Any"},
+ {ReduceType::MAX, std::string{toString(opcode())} + "Max"},
+ {ReduceType::MEAN, std::string{toString(opcode())} + "Mean"},
+ {ReduceType::MIN, std::string{toString(opcode())} + "Min"},
+ {ReduceType::PROD, std::string{toString(opcode())} + "Prod"},
+ {ReduceType::SUM, std::string{toString(opcode())} + "SUM"}};
return name_map.at(_param.reduce_type);
// return std::string(toString(opcode())) + reduce_type_str_map.at(_param.reduce_type);
}
Reshape::Reshape(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param ¶m)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param(param)
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param(param)
{
}
ResizeBilinear::ResizeBilinear(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs, const Param ¶m)
- : Operation{OperandConstraint::createInRange(1u, 2u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createInRange(1u, 2u), inputs, outputs}, _param{param}
{
}
ResizeNearestNeighbor::ResizeNearestNeighbor(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs,
const Param ¶m)
- : Operation{OperandConstraint::createInRange(1u, 2u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createInRange(1u, 2u), inputs, outputs}, _param{param}
{
}
void Reverse::accept(OperationVisitor &v) const { v.visit(*this); }
Reverse::Reverse(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
{
}
void Select::accept(OperationVisitor &v) const { v.visit(*this); }
Select::Select(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}
{
}
void Shape::accept(OperationVisitor &v) const { v.visit(*this); }
Shape::Shape(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}
{
}
void Slice::accept(OperationVisitor &v) const { v.visit(*this); }
Slice::Slice(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}
{
}
Softmax::Softmax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param ¶m)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
{
}
SpaceToBatchND::SpaceToBatchND(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}
{
}
SpaceToDepth::SpaceToDepth(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param ¶m)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
{
}
void Split::accept(OperationVisitor &v) const { v.visit(*this); }
Split::Split(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param ¶m)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
{
}
} // namespace operation
void SplitV::accept(OperationVisitor &v) const { v.visit(*this); }
SplitV::SplitV(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param ¶m)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
{
}
} // namespace operation
SquaredDifference::SquaredDifference(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
{
}
Squeeze::Squeeze(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param ¶m)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param(param)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param(param)
{
}
StatelessRandomUniform::StatelessRandomUniform(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
{
}
StridedSlice::StridedSlice(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param ¶m)
- : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param}
{
}
void Tile::accept(OperationVisitor &v) const { v.visit(*this); }
Tile::Tile(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
{
}
TopKV2::TopKV2(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param ¶m)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
{
}
void Transpose::accept(OperationVisitor &v) const { v.visit(*this); }
Transpose::Transpose(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
{
}
TransposeConv::TransposeConv(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs, const Param ¶m)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
{
}
void Unpack::accept(OperationVisitor &v) const { v.visit(*this); }
Unpack::Unpack(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param ¶m)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
{
}
} // namespace operation
void While::accept(OperationVisitor &v) const { v.visit(*this); }
While::While(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param ¶m)
- : Operation{OperandConstraint::createAny(), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createAny(), inputs, outputs}, _param{param}
{
}
} // namespace operation
OperationIndexMap<bool> visited;
operations.iterate(
- [&](const OperationIndex &index, const Operation &) { visited[index] = false; });
+ [&](const OperationIndex &index, const Operation &) { visited[index] = false; });
OperationIndexMap<bool> on_stack = visited; // Copy from visited
std::function<void(const OperationIndex &index, const Operation &)> dfs_recursive =
- [&](const OperationIndex &index, const Operation &node) -> void {
+ [&](const OperationIndex &index, const Operation &node) -> void {
if (on_stack[index])
cyclic = true;
if (visited[index])
// EdgeConsistencyVerifier
//
-bool EdgeConsistencyChecker::verify(const Graph &graph) const noexcept
+bool EdgeChecker::verify(const Graph &graph) const noexcept
{
auto &operations = graph.operations();
uint32_t errors = 0;
bool operand_has_use = operand.getUses().contains(index);
if (!operand_has_use)
{
- VERBOSE(EdgeConsistencyChecker) << "[ERROR] EDGE MISMATCH : Missing USE edge - Operand "
- << operand_index << " to Operation " << index
- << std::endl;
+ VERBOSE(EdgeChecker) << "[ERROR] EDGE MISMATCH : Missing USE edge - Operand "
+ << operand_index << " to Operation " << index << std::endl;
errors += 1;
}
}
catch (const std::out_of_range &e)
{
- VERBOSE(EdgeConsistencyChecker)
- << "[ERROR] OPEARAND NOT FOUND : Operation " << index << " has Operand "
- << operand_index << ", but the operand object is not present in the graph" << std::endl;
+ VERBOSE(EdgeChecker) << "[ERROR] OPEARAND NOT FOUND : Operation " << index
+ << " has Operand " << operand_index
+ << ", but the operand object is not present in the graph" << std::endl;
errors += 1;
}
}
auto &operand = graph.operands().at(operand_index);
if (operand.getDef() != index)
{
- VERBOSE(EdgeConsistencyChecker) << "[ERROR] EDGE MISMATCH : Missing DEF edge - Operand"
- << operand_index << " to Operation " << index
- << std::endl;
+ VERBOSE(EdgeChecker) << "[ERROR] EDGE MISMATCH : Missing DEF edge - Operand"
+ << operand_index << " to Operation " << index << std::endl;
errors += 1;
}
}
catch (const std::out_of_range &e)
{
- VERBOSE(EdgeConsistencyChecker)
- << "[ERROR] OPEARAND NOT FOUND : Operation " << index << " has Operand "
- << operand_index << ", but the operand object is not present in the graph" << std::endl;
+ VERBOSE(EdgeChecker) << "[ERROR] OPEARAND NOT FOUND : Operation " << index
+ << " has Operand " << operand_index
+ << ", but the operand object is not present in the graph" << std::endl;
errors += 1;
}
}
});
- VERBOSE(EdgeConsistencyChecker) << "Total Number of errors : " << errors << std::endl;
+ VERBOSE(EdgeChecker) << "Total Number of errors : " << errors << std::endl;
return errors == 0;
}
bool verify(const Graph &graph) const noexcept override;
};
-class EdgeConsistencyChecker : public IVerifier
+class EdgeChecker : public IVerifier
{
public:
bool verify(const Graph &graph) const noexcept override;
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/EventWriter.h"
+
+#include <sstream>
+#include <vector>
+#include <cassert>
+#include <utility>
+
+// json type for ChromeTracingWriter
+namespace
+{
+
+std::string quote(const std::string &value)
+{
+ std::stringstream ss;
+ ss << '"' << value << '"';
+ return ss.str();
+}
+
+std::string field(const std::string &k, const std::string &v)
+{
+ std::stringstream ss;
+ ss << quote(k) << " : " << quote(v);
+ return ss.str();
+}
+
+struct Content // One Entry in Chrome Event Trace
+{
+ std::vector<std::pair<std::string, std::string>> flds;
+ std::vector<std::pair<std::string, std::string>> args;
+};
+
+std::string object(const Content &content)
+{
+ std::stringstream ss;
+
+ ss << "{ ";
+
+ ss << field(content.flds[0].first, content.flds[0].second);
+
+ for (uint32_t n = 1; n < content.flds.size(); ++n)
+ {
+ ss << ", " << field(content.flds.at(n).first, content.flds.at(n).second);
+ }
+
+ if (content.args.size() > 0)
+ {
+ ss << ", " << quote("args") << " : { ";
+ ss << field(content.args.at(0).first, content.args.at(0).second);
+
+ for (uint32_t n = 1; n < content.args.size(); ++n)
+ {
+ ss << ", " << field(content.args.at(n).first, content.args.at(n).second);
+ }
+
+ ss << "}";
+ }
+
+ ss << " }";
+
+ return ss.str();
+}
+
+void fill(Content &content, const DurationEvent &evt, const std::string &name,
+ const std::string &tid)
+{
+ content.flds.emplace_back("name", name);
+ content.flds.emplace_back("pid", "0");
+ content.flds.emplace_back("tid", tid);
+ content.flds.emplace_back("ph", evt.ph);
+ content.flds.emplace_back("ts", evt.ts);
+ content.args = evt.args;
+}
+
+void fill(Content &content, const CounterEvent &evt)
+{
+ assert(evt.name != "");
+
+ content.flds.emplace_back("name", evt.name);
+ content.flds.emplace_back("pid", "0");
+ content.flds.emplace_back("tid", evt.tid);
+ content.flds.emplace_back("ph", evt.ph);
+ content.flds.emplace_back("ts", evt.ts);
+ content.args = evt.args;
+}
+
+std::string object(const DurationEvent &evt, const std::string &name, const std::string &tid)
+{
+ Content content;
+
+ fill(content, evt, name, tid);
+
+ return ::object(content);
+}
+
+std::string object(const CounterEvent &evt)
+{
+ Content content;
+
+ fill(content, evt);
+
+ for (auto it = evt.values.begin(); it != evt.values.end(); ++it)
+ {
+ content.args.emplace_back(it->first, it->second);
+ }
+
+ return ::object(content);
+}
+
+std::string getSessionLabel(const DurationEvent &evt)
+{
+ return "$" + std::to_string(evt.session_index) + " sess";
+}
+
+std::string getSubgLabel(const DurationEvent &evt)
+{
+ return "$" + std::to_string(evt.subg_index) + " subg";
+}
+
+std::string getOpLabel(const OpSeqDurationEvent &evt)
+{
+ return "@" + std::to_string(evt.op_index) + " " + evt.op_name;
+}
+
+std::string getLabel(const DurationEvent &evt)
+{
+ if (auto evt_ptr = dynamic_cast<const OpSeqDurationEvent *>(&evt))
+ {
+ return getOpLabel(*evt_ptr);
+ }
+ else // SubgDurationEvent
+ {
+ return getSubgLabel(evt);
+ }
+}
+
+std::string getTid(const DurationEvent &evt)
+{
+ if (auto evt_ptr = dynamic_cast<const OpSeqDurationEvent *>(&evt))
+ {
+ return getSessionLabel(*evt_ptr) + ", " + getSubgLabel(*evt_ptr) + ", " + evt_ptr->backend;
+ }
+ else // SubgDurationEvent
+ {
+ return getSessionLabel(evt) + ", " + getSubgLabel(evt);
+ }
+}
+
+} // namespace
+
+void ChromeTracingWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &recorders)
+{
+ _os << "{\n";
+ _os << " " << quote("traceEvents") << ": [\n";
+
+ for (auto &recorder : recorders)
+ {
+ flushOneRecord(*recorder);
+ }
+
+ _os << " { }\n";
+ _os << " ]\n";
+ _os << "}\n";
+}
+
+void ChromeTracingWriter::flushOneRecord(const EventRecorder &recorder)
+{
+ for (auto &evt : recorder.duration_events())
+ {
+ const std::string name = getLabel(*evt);
+ const std::string tid = getTid(*evt);
+
+ _os << " " << object(*evt, name, tid) << ",\n";
+ }
+
+ for (auto &evt : recorder.counter_events())
+ {
+ _os << " " << object(evt) << ",\n";
+ }
+}
{
auto now = std::chrono::steady_clock::now();
return std::to_string(
- std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count());
+ std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count());
}
-class DurationEventBuilder
+class DurationEventBuilder : public EventCollector::EventVisitor
{
public:
DurationEventBuilder(const std::string &ts) : _ts{ts} {}
- DurationEvent build(const EventCollector::Event &evt_collected, const std::string &ph) const
+ std::unique_ptr<SubgDurationEvent> build(const EventCollector::SubgEvent &evt_collected,
+ const std::string &ph) const
{
- DurationEvent evt;
+ auto dur_evt = std::make_unique<SubgDurationEvent>();
- evt.name = evt_collected.label;
- evt.tid = evt_collected.backend;
- evt.ph = ph;
- evt.ts = _ts;
+ // The following will be set by a child of EventsWriter:
+ // dur_evt.name, dur_evt.tid
+ dur_evt->ph = ph;
+ dur_evt->ts = _ts;
+ dur_evt->tracing_ctx = evt_collected.tracing_ctx;
- evt.args = evt_collected.userData;
+ dur_evt->session_index = evt_collected.session_index;
+ dur_evt->subg_index = evt_collected.subg_index;
- return evt;
+ dur_evt->args = evt_collected.userData;
+ {
+ dur_evt->args.emplace_back("session", std::to_string(evt_collected.session_index));
+ dur_evt->args.emplace_back("subgraph", std::to_string(evt_collected.subg_index));
+ }
+
+ return dur_evt;
+ }
+
+ std::unique_ptr<OpSeqDurationEvent> build(const EventCollector::OpSeqEvent &evt_collected,
+ const std::string &ph) const
+ {
+ auto dur_evt = std::make_unique<OpSeqDurationEvent>();
+
+ // The following will be set by a child of EventsWriter:
+ // dur_evt.name, dur_evt.tid
+ dur_evt->ph = ph;
+ dur_evt->ts = _ts;
+ dur_evt->tracing_ctx = evt_collected.tracing_ctx;
+
+ dur_evt->session_index = evt_collected.session_index;
+ dur_evt->subg_index = evt_collected.subg_index;
+
+ dur_evt->backend = evt_collected.backend;
+ dur_evt->op_index = evt_collected.op_index;
+ dur_evt->op_name = evt_collected.op_name;
+
+ dur_evt->args = evt_collected.userData;
+ {
+ dur_evt->args.emplace_back("session", std::to_string(evt_collected.session_index));
+ dur_evt->args.emplace_back("subgraph", std::to_string(evt_collected.subg_index));
+ }
+
+ return dur_evt;
}
private:
} // namespace
-void EventCollector::onEvent(const Event &event)
+template <typename EventT> void EventCollector::onEvent(const EventT &event)
{
auto ts = timestamp();
+ DurationEventBuilder builder(ts);
+
switch (event.edge)
{
case Edge::BEGIN:
- _rec->emit(DurationEventBuilder(ts).build(event, "B"));
+ {
+ auto duration_evt = builder.build(event, "B");
+ _rec->emit(std::move(duration_evt));
break;
-
+ }
case Edge::END:
- _rec->emit(DurationEventBuilder(ts).build(event, "E"));
+ {
+ auto duration_evt = builder.build(event, "E");
+ _rec->emit(std::move(duration_evt));
break;
+ }
}
// TODO: Add resurece measurement(e.g. RSS)
emit_rusage(_rec, ts);
#endif
}
+
+// template instantiation
+template void EventCollector::onEvent<EventCollector::SubgEvent>(const SubgEvent &event);
+template void EventCollector::onEvent<EventCollector::OpSeqEvent>(const OpSeqEvent &event);
#define __ONERT_UTIL_EVENT_COLLECTOR_H__
#include "util/EventRecorder.h"
+#include "util/TracingCtx.h"
#include <vector>
#include <utility>
END
};
+ struct SubgEvent;
+ struct OpEvent;
+
+ class EventVisitor
+ {
+ public:
+ virtual ~EventVisitor() = default;
+
+ virtual std::unique_ptr<DurationEvent> visit(const SubgEvent &, const std::string &) const
+ {
+ throw std::runtime_error("Please implement");
+ }
+ virtual std::unique_ptr<DurationEvent> visit(const OpEvent &, const std::string &) const
+ {
+ throw std::runtime_error("Please implement");
+ }
+ };
+
struct Event
{
+ const onert::util::TracingCtx *tracing_ctx;
+
Edge edge;
uint32_t session_index;
uint32_t subg_index;
- std::string backend;
- uint32_t op_index;
- std::string op_name;
- uint32_t op_seq_size; // if this event is for an operation sequence of multiple operations
-
- // TODO deprecate this. label can be differ by writer. So let the writer decide label.
- std::string label;
// user-defined data: pairs of (key, value)
std::vector<std::pair<std::string, std::string>> userData;
- Event(Edge a_edge, const std::string &a_backend, const std::string &a_label)
- : edge(a_edge), session_index(0), subg_index(0), backend(a_backend), op_index(0),
- op_seq_size(0), label(a_label)
+ protected:
+ Event(const onert::util::TracingCtx *a_tracing_ctx, Edge a_edge, uint32_t a_subg_index)
+ : tracing_ctx(a_tracing_ctx), edge(a_edge), session_index(tracing_ctx->getSessionId()),
+ subg_index(a_subg_index)
+ { /* empty */
+ }
+
+ virtual ~Event() = default;
+ };
+
+ struct SubgEvent : public Event
+ {
+ // constructor for subgraph start and end event
+ SubgEvent(const onert::util::TracingCtx *a_tracing_ctx, Edge a_edge, uint32_t a_subg_index)
+ : Event(a_tracing_ctx, a_edge, a_subg_index)
{ /* empty */
}
};
+ // TODO Rename this to OperationEvent
+ struct OpSeqEvent : public Event
+ {
+ std::string backend;
+ uint32_t op_index;
+ std::string op_name;
+
+ OpSeqEvent(const onert::util::TracingCtx *a_tracing_ctx, Edge a_edge, uint32_t a_subg_index,
+ const std::string a_backend, uint32_t a_op_index, const std::string a_op_name)
+ : Event(a_tracing_ctx, a_edge, a_subg_index)
+ {
+ backend.assign(a_backend);
+ op_index = a_op_index;
+ op_name.assign(a_op_name);
+ }
+ };
+
public:
EventCollector(EventRecorder *rec) : _rec{rec}
{
}
public:
- void onEvent(const Event &event);
+ template <typename EventT> void onEvent(const EventT &event);
protected:
EventRecorder *_rec;
#include "util/EventRecorder.h"
-void EventRecorder::emit(const DurationEvent &evt)
+void EventRecorder::emit(std::unique_ptr<DurationEvent> &&evt)
{
std::lock_guard<std::mutex> lock{_mu};
- _duration_events.push_back(evt);
+ _duration_events.push_back(std::move(evt));
}
void EventRecorder::emit(const CounterEvent &evt)
#ifndef __ONERT_UTIL_EVENT_RECORDER_H__
#define __ONERT_UTIL_EVENT_RECORDER_H__
+#include "util/TracingCtx.h"
+
#include <map>
#include <memory>
#include <mutex>
#include <vector>
+// refer to https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/edit#
struct Event
{
- std::string name;
- std::string tid;
- std::string ph; /* REQUIRED */
- std::string ts; /* REQUIRED */
+ const onert::util::TracingCtx *tracing_ctx;
+
+ std::string ph; // Event type.
+ std::string ts; // tracing clock of timestamp of this event
std::vector<std::pair<std::string, std::string>> args; // user-defined data: pairs of (key, value)
+
+ virtual ~Event() = default;
};
struct DurationEvent : public Event
{
- // TO BE FILLED
+ uint32_t session_index = 0;
+ uint32_t subg_index = 0;
+
+protected:
+ DurationEvent() = default;
+};
+
+struct SubgDurationEvent : public DurationEvent
+{ /* same with DurationEvent */
+};
+
+// TODO Rename it to OperationDurationEvent
+struct OpSeqDurationEvent : public DurationEvent
+{
+ // Note: DurationEvent's name and tid will be set by EventWriter
+ std::string backend;
+ uint32_t op_index;
+ std::string op_name;
};
struct CounterEvent : public Event
{
+ std::string name; // name of event
+ std::string tid; // thread ID
std::map<std::string, std::string> values;
};
EventRecorder() = default;
public:
- void emit(const DurationEvent &evt);
+ void emit(std::unique_ptr<DurationEvent> &&evt);
void emit(const CounterEvent &evt);
public:
- bool empty() { return _duration_events.empty() && _counter_events.empty(); }
- const std::vector<DurationEvent> &duration_events() const { return _duration_events; }
+ const std::vector<std::unique_ptr<DurationEvent>> &duration_events() const
+ {
+ return _duration_events;
+ }
const std::vector<CounterEvent> &counter_events() const { return _counter_events; }
private:
std::mutex _mu;
- std::vector<DurationEvent> _duration_events;
+ std::vector<std::unique_ptr<DurationEvent>> _duration_events;
std::vector<CounterEvent> _counter_events;
};
#include "util/EventWriter.h"
-#include <sstream>
-#include <vector>
-#include <unordered_map>
-#include <json/json.h>
-#include <assert.h>
-#include <utility>
-#include <map>
-#include <set>
-#include <stdint.h>
-#include <fstream>
-
-// json type for Chrome Event Trace
-namespace
-{
-
-std::string quote(const std::string &value)
-{
- std::stringstream ss;
- ss << '"' << value << '"';
- return ss.str();
-}
-
-std::string field(const std::string &k, const std::string &v)
-{
- std::stringstream ss;
- ss << quote(k) << " : " << quote(v);
- return ss.str();
-}
-
-struct Content // One Entry in Chrome Event Trace
-{
- std::vector<std::pair<std::string, std::string>> flds;
- std::vector<std::pair<std::string, std::string>> args;
-};
-
-std::string object(const Content &content)
-{
- std::stringstream ss;
-
- ss << "{ ";
-
- ss << field(content.flds[0].first, content.flds[0].second);
-
- for (uint32_t n = 1; n < content.flds.size(); ++n)
- {
- ss << ", " << field(content.flds.at(n).first, content.flds.at(n).second);
- }
-
- if (content.args.size() > 0)
- {
- ss << ", " << quote("args") << " : { ";
- ss << field(content.args.at(0).first, content.args.at(0).second);
-
- for (uint32_t n = 1; n < content.args.size(); ++n)
- {
- ss << ", " << field(content.args.at(n).first, content.args.at(n).second);
- }
-
- ss << "}";
- }
-
- ss << " }";
-
- return ss.str();
-}
-
-void fill(Content &content, const Event &evt)
-{
- content.flds.emplace_back("name", evt.name);
- content.flds.emplace_back("pid", "0");
- content.flds.emplace_back("tid", evt.tid);
- content.flds.emplace_back("ph", evt.ph);
- content.flds.emplace_back("ts", evt.ts);
- content.args = evt.args;
-}
-
-std::string object(const DurationEvent &evt)
-{
- Content content;
-
- fill(content, evt);
-
- return ::object(content);
-}
-
-std::string object(const CounterEvent &evt)
-{
- Content content;
-
- fill(content, evt);
-
- for (auto it = evt.values.begin(); it != evt.values.end(); ++it)
- {
- content.args.emplace_back(it->first, it->second);
- }
-
- return ::object(content);
-}
-
-} // namespace
-
-// md table type
-namespace
-{
-
-void writeMDTableRow(std::ostream &os, const std::vector<std::string> &list)
-{
- os << "| ";
- for (auto &key : list)
- {
- os << key << " | ";
- }
- os << "\n";
-}
-
-struct MDContent
-{
- std::string name;
- uint64_t begin_ts;
- uint64_t end_ts;
- uint32_t min_rss;
- uint32_t max_rss;
- uint32_t min_page_reclaims;
- uint32_t max_page_reclaims;
-
- MDContent()
- : begin_ts(0), end_ts(0), min_rss(UINT32_MAX), max_rss(0), min_page_reclaims(UINT32_MAX),
- max_page_reclaims(0)
- {
- // DO NOTHING
- }
-
- virtual ~MDContent() = default;
-
- void updateRss(uint32_t rss)
- {
- if (min_rss == UINT32_MAX)
- min_rss = rss;
- if (max_rss == 0)
- max_rss = rss;
-
- if (min_rss > rss)
- min_rss = rss;
- else if (max_rss < rss)
- max_rss = rss;
- }
-
- void updateMinflt(uint32_t minflt)
- {
- if (min_page_reclaims == UINT32_MAX)
- min_page_reclaims = minflt;
- if (max_page_reclaims == 0)
- max_page_reclaims = minflt;
-
- if (min_page_reclaims > minflt)
- min_page_reclaims = minflt;
- else if (max_page_reclaims < minflt)
- max_page_reclaims = minflt;
- }
-
- virtual void write(std::ostream &os) const = 0;
-};
-
-struct OpSeq : public MDContent
-{
- std::string backend;
- uint64_t graph_latency;
-
- struct OpSeqCmp
- {
- bool operator()(const OpSeq &lhs, const OpSeq &rhs) const
- {
- return lhs.begin_ts < rhs.begin_ts;
- }
- bool operator()(const OpSeq &lhs, const OpSeq &rhs) { return lhs.begin_ts < rhs.begin_ts; }
- bool operator()(OpSeq &lhs, OpSeq &rhs) { return lhs.begin_ts < rhs.begin_ts; }
- };
-
- void write(std::ostream &os) const override
- {
- uint64_t opseq_latency = end_ts - begin_ts;
- double opseq_per = static_cast<double>(opseq_latency) / graph_latency * 100.0;
- writeMDTableRow(os, {name, backend, std::to_string(opseq_latency), std::to_string(opseq_per),
- std::to_string(min_rss), std::to_string(max_rss),
- std::to_string(min_page_reclaims), std::to_string(max_page_reclaims)});
- }
-};
-
-struct Graph : public MDContent
-{
- std::set<OpSeq, OpSeq::OpSeqCmp> opseqs;
-
- void setOpSeqs(const std::map<std::string, OpSeq> &name_to_opseq)
- {
- uint64_t graph_latency = end_ts - begin_ts;
- for (auto it : name_to_opseq)
- {
- auto opseq = it.second;
- opseq.graph_latency = graph_latency;
-
- opseqs.insert(opseq);
-
- updateRss(opseq.min_rss);
- updateRss(opseq.max_rss);
- updateMinflt(opseq.min_page_reclaims);
- updateMinflt(opseq.max_page_reclaims);
- }
- }
-
- void write(std::ostream &os) const override
- {
- static std::vector<std::string> graph_headers{"latency(us)", "rss_min(kb)", "rss_max(kb)",
- "page_reclaims_min", "page_reclaims_max"};
-
- static std::vector<std::string> graph_headers_line{"-----------", "-------", "-------",
- "-----------------", "-----------------"};
-
- // Graph's Header
- writeMDTableRow(os, graph_headers);
- writeMDTableRow(os, graph_headers_line);
-
- // Graph's contents
- writeMDTableRow(os, {std::to_string(end_ts - begin_ts), std::to_string(min_rss),
- std::to_string(max_rss), std::to_string(min_page_reclaims),
- std::to_string(max_page_reclaims)});
-
- os << "\n";
-
- static std::vector<std::string> opseq_headers{
- "OpSeq name", "backend", "latency(us)", "latency(%)",
- "rss_min(kb)", "rss_max(kb)", "page_reclaims_min", "page_reclaims_max"};
-
- static std::vector<std::string> opseq_headers_line{
- "----------", "-------", "-----------", "-----------",
- "-------", "-------", "-----------------", "-----------------"};
-
- os << "## OpSequences \n";
-
- // OpSeq's Header
- writeMDTableRow(os, opseq_headers);
- writeMDTableRow(os, opseq_headers_line);
-
- // OpSeq's contents
- for (auto opseq : opseqs)
- {
- opseq.write(os);
- }
-
- os << "\n";
- }
-};
-
-struct MDTableBuilder
-{
- MDTableBuilder(const std::vector<DurationEvent> &duration_events,
- const std::vector<CounterEvent> &counter_events)
- : _duration_events(duration_events), _counter_events(counter_events)
- {
-// when ready with low overhead in release build
-#ifdef DEBUG
- for (const auto &evt : _counter_events)
- {
- uint64_t ts = std::stoull(evt.ts);
- auto &name = evt.name;
- assert(name.compare("maxrss") == 0 || name.compare("minflt") == 0);
- assert(evt.values.size() == 1);
- auto &val = evt.values.begin()->second;
- if (_ts_to_values.find(ts) == _ts_to_values.end())
- {
- std::pair<uint32_t, uint32_t> values;
- if (name.compare("maxrss") == 0)
- values.first = std::stoul(val);
- else
- values.second = std::stoul(val);
- _ts_to_values.insert({ts, values});
- }
- else
- {
- auto &values = _ts_to_values.at(ts);
- if (name.compare("maxrss") == 0)
- values.first = std::stoul(val);
- else
- values.second = std::stoul(val);
- }
- }
-#endif
- }
-
- MDTableBuilder &build()
- {
- for (auto &it : divideGraph())
- {
- size_t begin_idx = it.first;
- size_t end_idx = it.second;
- std::map<std::string, OpSeq> name_to_opseq;
- for (size_t i = begin_idx + 1; i < end_idx; ++i)
- {
- const auto &evt = _duration_events[i];
- assert(evt.name.compare("Graph") != 0);
- assert(evt.ph.compare("B") == 0 || evt.ph.compare("E") == 0);
- if (evt.ph.compare("B") == 0)
- {
- assert(name_to_opseq.find(evt.name) == name_to_opseq.end());
- name_to_opseq.insert({evt.name, makeOpSeq(evt)});
- }
- else
- {
- assert(name_to_opseq.find(evt.name) != name_to_opseq.end());
- auto &opseq = name_to_opseq.at(evt.name);
- updateOpSeq(opseq, evt);
- }
- }
-
- _graphs.emplace_back(makeGraph(begin_idx, end_idx, name_to_opseq));
- }
-
- return *this;
- }
-
- std::vector<std::pair<size_t, size_t>> divideGraph()
- {
- std::vector<std::pair<size_t, size_t>> graph_idx_list; // pair<begin_idx, end_idx>
- for (size_t i = 0, begin_idx = 0; i < _duration_events.size(); ++i)
- {
- const auto &evt = _duration_events.at(i);
- if (evt.name.compare("Graph") == 0)
- {
- if (evt.ph.compare("B") == 0)
- begin_idx = i;
- else
- graph_idx_list.emplace_back(begin_idx, i);
- }
- }
- return graph_idx_list;
- }
-
- OpSeq makeOpSeq(const DurationEvent &evt)
- {
- OpSeq opseq;
- opseq.name = evt.name;
- opseq.begin_ts = std::stoull(evt.ts);
- opseq.backend = evt.tid;
-#ifdef DEBUG
- opseq.updateRss(_ts_to_values.at(opseq.begin_ts).first);
- opseq.updateMinflt(_ts_to_values.at(opseq.begin_ts).second);
-#else
- opseq.updateRss(0);
- opseq.updateMinflt(0);
-#endif
- return opseq;
- }
-
- void updateOpSeq(OpSeq &opseq, const DurationEvent &evt)
- {
- opseq.end_ts = std::stoull(evt.ts);
-#ifdef DEBUG
- opseq.updateRss(_ts_to_values.at(opseq.end_ts).first);
- opseq.updateMinflt(_ts_to_values.at(opseq.end_ts).second);
-#else
- opseq.updateRss(0);
- opseq.updateMinflt(0);
-#endif
- }
-
- Graph makeGraph(size_t begin_idx, size_t end_idx,
- const std::map<std::string, OpSeq> &name_to_opseq)
- {
- Graph graph;
- graph.name = "Graph";
- graph.begin_ts = std::stoull(_duration_events[begin_idx].ts);
- graph.end_ts = std::stoull(_duration_events[end_idx].ts);
- graph.setOpSeqs(name_to_opseq);
-#ifdef DEBUG
- graph.updateRss(_ts_to_values.at(graph.begin_ts).first);
- graph.updateMinflt(_ts_to_values.at(graph.begin_ts).second);
- graph.updateRss(_ts_to_values.at(graph.end_ts).first);
- graph.updateMinflt(_ts_to_values.at(graph.end_ts).second);
-#else
- graph.updateRss(0);
- graph.updateMinflt(0);
-#endif
- return graph;
- }
-
- void write(std::ostream &os)
- {
- // Write contents
- for (size_t i = 0; i < _graphs.size(); ++i)
- {
- os << "# Graph " << i << "\n";
- _graphs.at(i).write(os);
- }
- }
-
- const std::vector<DurationEvent> &_duration_events;
- const std::vector<CounterEvent> &_counter_events;
- // timestamp to std::pair<maxrss, minflt>
- std::unordered_map<uint64_t, std::pair<uint32_t, uint32_t>> _ts_to_values;
- std::vector<Graph> _graphs;
-};
-
-} // namespace
-
-void SNPEWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &recorders)
-{
- Json::Value root;
- auto &exec_data = root["Execution_Data"] = Json::Value{Json::objectValue};
-
- struct Stat
- {
- uint64_t sum = 0;
- uint64_t count = 0;
- uint64_t max = 0;
- uint64_t min = std::numeric_limits<uint64_t>::max();
-
- void accumulate(uint64_t val)
- {
- sum += val;
- count++;
- max = std::max(max, val);
- min = std::min(min, val);
- }
- };
-
- // Memory
- {
- std::unordered_map<std::string, Stat> mem_stats;
- for (auto &recorder : recorders)
- {
- for (auto &evt : recorder->counter_events())
- {
- auto &mem_stat = mem_stats[evt.name];
- uint64_t val = std::stoull(evt.values.at("value"));
- mem_stat.accumulate(val);
- }
- }
-
- auto &mem = exec_data["memory"] = Json::Value{Json::objectValue};
- for (auto &kv : mem_stats)
- {
- auto &key = kv.first;
- auto &val = kv.second;
- mem[key]["Avg_Size"] = val.sum / val.count;
- mem[key]["Max_Size"] = val.max;
- mem[key]["Min_Size"] = val.min;
- mem[key]["Runtime"] = "NA";
- }
- }
-
- // Operation Execution Time
- {
- // NOTE This assumes _duration_events is sorted by "ts" ascending
-
- // 2D keys : stats[tid][name]
- std::unordered_map<std::string, std::unordered_map<std::string, Stat>> stats;
- std::unordered_map<std::string, std::unordered_map<std::string, uint64_t>> begin_timestamps;
- for (auto &recorder : recorders)
- {
- for (auto &evt : recorder->duration_events())
- {
- auto &stat = stats[evt.tid][evt.name];
- auto &begin_ts = begin_timestamps[evt.tid][evt.name];
- uint64_t timestamp = std::stoull(evt.ts);
- if (evt.ph == "B")
- {
- if (begin_ts != 0)
- throw std::runtime_error{"Invalid Data"};
- begin_ts = timestamp;
- }
- else if (evt.ph == "E")
- {
- if (begin_ts == 0 || timestamp < begin_ts)
- throw std::runtime_error{"Invalid Data"};
- stat.accumulate(timestamp - begin_ts);
- begin_ts = 0;
- }
- else
- throw std::runtime_error{"Invalid Data - invalid value for \"ph\" : \"" + evt.ph + "\""};
- }
- }
-
- for (auto &kv : begin_timestamps)
- for (auto &kv2 : kv.second)
- if (kv2.second != 0)
- throw std::runtime_error{"Invalid Data - B and E pair does not match."};
-
- for (auto &kv : stats)
- {
- auto &tid = kv.first;
- auto &map = kv.second;
- auto &json_tid = exec_data[tid] = Json::Value{Json::objectValue};
- for (auto &kv : map)
- {
- auto &name = kv.first;
- auto &val = kv.second;
- json_tid[name]["Avg_Time"] = val.sum / val.count;
- json_tid[name]["Max_Time"] = val.max;
- json_tid[name]["Min_Time"] = val.min;
- json_tid[name]["Runtime"] = tid;
- }
- }
- }
-
- _os << root;
-}
-
-void ChromeTracingWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &recorders)
-{
- _os << "{\n";
- _os << " " << quote("traceEvents") << ": [\n";
-
- for (auto &recorder : recorders)
- {
- flushOneRecord(*recorder);
- }
-
- _os << " { }\n";
- _os << " ]\n";
- _os << "}\n";
-}
-
-void ChromeTracingWriter::flushOneRecord(const EventRecorder &recorder)
-{
- for (auto &evt : recorder.duration_events())
- {
- _os << " " << object(evt) << ",\n";
- }
-
- for (auto &evt : recorder.counter_events())
- {
- _os << " " << object(evt) << ",\n";
- }
-}
-
-void MDTableWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &records)
-{
- for (auto &recorder : records)
- {
- MDTableBuilder(recorder->duration_events(), recorder->counter_events()).build().write(_os);
- }
-}
+#include <cassert>
// initialization
std::mutex EventWriter::_mutex;
{
public:
EventFormatWriter(const std::string &filepath) : _os{filepath, std::ofstream::out} {}
- virtual ~EventFormatWriter() { /* empty */}
+ virtual ~EventFormatWriter()
+ { /* empty */
+ }
virtual void flush(const std::vector<std::unique_ptr<EventRecorder>> &) = 0;
class SNPEWriter : public EventFormatWriter
{
public:
- SNPEWriter(const std::string &filepath) : EventFormatWriter(filepath) { /* empty */}
+ SNPEWriter(const std::string &filepath) : EventFormatWriter(filepath)
+ { /* empty */
+ }
+ ~SNPEWriter() {}
+
void flush(const std::vector<std::unique_ptr<EventRecorder>> &) override;
};
class ChromeTracingWriter : public EventFormatWriter
{
public:
- ChromeTracingWriter(const std::string &filepath) : EventFormatWriter(filepath) { /* empty */}
+ ChromeTracingWriter(const std::string &filepath) : EventFormatWriter(filepath)
+ { /* empty */
+ }
+ ~ChromeTracingWriter() {}
+
void flush(const std::vector<std::unique_ptr<EventRecorder>> &) override;
private:
class MDTableWriter : public EventFormatWriter
{
public:
- MDTableWriter(const std::string &filepath) : EventFormatWriter(filepath) { /* empty */}
- void flush(const std::vector<std::unique_ptr<EventRecorder>> &) override;
+ MDTableWriter(const std::string &filepath) : EventFormatWriter(filepath)
+ { /* empty */
+ }
+ ~MDTableWriter() {}
-private:
- void flushOneRecord(const EventRecorder &);
+ void flush(const std::vector<std::unique_ptr<EventRecorder>> &) override;
};
+#include <mutex>
+
class EventWriter
{
public:
_actual_writers[WriteFormat::SNPE_BENCHMARK] = std::make_unique<SNPEWriter>(snpe_log_name);
_actual_writers[WriteFormat::CHROME_TRACING] =
- std::make_unique<ChromeTracingWriter>(chrome_tracing_log_name);
+ std::make_unique<ChromeTracingWriter>(chrome_tracing_log_name);
_actual_writers[WriteFormat::MD_TABLE] = std::make_unique<MDTableWriter>(md_table_log_name);
};
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/EventWriter.h"
+
+#include <sstream>
+#include <vector>
+#include <unordered_map>
+#include <cassert>
+#include <utility>
+#include <map>
+#include <set>
+#include <stdint.h>
+
+// md table type
+namespace
+{
+
+void writeMDTableRow(std::ostream &os, const std::vector<std::string> &list)
+{
+ os << "| ";
+ for (auto &key : list)
+ {
+ os << key << " | ";
+ }
+ os << "\n";
+}
+
+struct MDContent
+{
+ std::string name;
+ uint64_t begin_ts;
+ uint64_t end_ts;
+ uint32_t min_rss;
+ uint32_t max_rss;
+ uint32_t min_page_reclaims;
+ uint32_t max_page_reclaims;
+
+ MDContent()
+ : begin_ts(0), end_ts(0), min_rss(UINT32_MAX), max_rss(0), min_page_reclaims(UINT32_MAX),
+ max_page_reclaims(0)
+ {
+ // DO NOTHING
+ }
+
+ virtual ~MDContent() = default;
+
+ void updateRss(uint32_t rss)
+ {
+ if (min_rss == UINT32_MAX)
+ min_rss = rss;
+ if (max_rss == 0)
+ max_rss = rss;
+
+ if (min_rss > rss)
+ min_rss = rss;
+ else if (max_rss < rss)
+ max_rss = rss;
+ }
+
+ void updateMinflt(uint32_t minflt)
+ {
+ if (min_page_reclaims == UINT32_MAX)
+ min_page_reclaims = minflt;
+ if (max_page_reclaims == 0)
+ max_page_reclaims = minflt;
+
+ if (min_page_reclaims > minflt)
+ min_page_reclaims = minflt;
+ else if (max_page_reclaims < minflt)
+ max_page_reclaims = minflt;
+ }
+
+ virtual void write(std::ostream &os) const = 0;
+};
+
+struct Operation : public MDContent
+{
+ std::string backend;
+ uint64_t graph_latency;
+
+ struct OperationCmp
+ {
+ bool operator()(const Operation &lhs, const Operation &rhs) const
+ {
+ return lhs.begin_ts < rhs.begin_ts;
+ }
+ bool operator()(const Operation &lhs, const Operation &rhs)
+ {
+ return lhs.begin_ts < rhs.begin_ts;
+ }
+ bool operator()(Operation &lhs, Operation &rhs) { return lhs.begin_ts < rhs.begin_ts; }
+ };
+
+ void write(std::ostream &os) const override
+ {
+ uint64_t op_latency = end_ts - begin_ts;
+ double op_per = static_cast<double>(op_latency) / graph_latency * 100.0;
+ writeMDTableRow(os, {name, backend, std::to_string(op_latency), std::to_string(op_per),
+ std::to_string(min_rss), std::to_string(max_rss),
+ std::to_string(min_page_reclaims), std::to_string(max_page_reclaims)});
+ }
+};
+
+struct Graph : public MDContent
+{
+ std::set<Operation, Operation::OperationCmp> ops;
+ std::string session_index;
+ std::string subgraph_index;
+
+ void setOperations(const std::map<std::string, Operation> &name_to_op)
+ {
+ uint64_t graph_latency = end_ts - begin_ts;
+ for (auto it : name_to_op)
+ {
+ auto op = it.second;
+ op.graph_latency = graph_latency;
+
+ ops.insert(op);
+
+ updateRss(op.min_rss);
+ updateRss(op.max_rss);
+ updateMinflt(op.min_page_reclaims);
+ updateMinflt(op.max_page_reclaims);
+ }
+ }
+
+ void write(std::ostream &os) const override
+ {
+ static std::vector<std::string> graph_headers{"latency(us)", "rss_min(kb)", "rss_max(kb)",
+ "page_reclaims_min", "page_reclaims_max"};
+
+ static std::vector<std::string> graph_headers_line{"-----------", "-------", "-------",
+ "-----------------", "-----------------"};
+
+ // Graph's Header
+ writeMDTableRow(os, graph_headers);
+ writeMDTableRow(os, graph_headers_line);
+
+ // Graph's contents
+ writeMDTableRow(os, {std::to_string(end_ts - begin_ts), std::to_string(min_rss),
+ std::to_string(max_rss), std::to_string(min_page_reclaims),
+ std::to_string(max_page_reclaims)});
+
+ os << "\n";
+
+ static std::vector<std::string> op_headers{
+ "Op name", "backend", "latency(us)", "latency(%)",
+ "rss_min(kb)", "rss_max(kb)", "page_reclaims_min", "page_reclaims_max"};
+
+ static std::vector<std::string> op_headers_line{
+ "-------", "-------", "-----------", "-----------",
+ "-------", "-------", "-----------------", "-----------------"};
+
+ os << "## Op \n";
+
+ // Operation's Header
+ writeMDTableRow(os, op_headers);
+ writeMDTableRow(os, op_headers_line);
+
+ // Operation's contents
+ for (auto op : ops)
+ {
+ op.write(os);
+ }
+
+ os << "\n";
+ }
+};
+
+std::string getLabel(const OpSeqDurationEvent &evt)
+{
+ std::string subg_label("$" + std::to_string(evt.subg_index) + " subgraph");
+ std::string op_label("@" + std::to_string(evt.op_index) + " " + evt.op_name);
+
+ return subg_label + " " + op_label;
+}
+
+struct MDTableBuilder
+{
+ MDTableBuilder(const std::vector<std::unique_ptr<DurationEvent>> &duration_events,
+ const std::vector<CounterEvent> &counter_events)
+ : _duration_events(duration_events), _counter_events(counter_events)
+ {
+// when ready with low overhead in release build
+#ifdef DEBUG
+ for (const auto &evt : _counter_events)
+ {
+ uint64_t ts = std::stoull(evt.ts);
+ auto &name = evt.name;
+ assert(name.compare("maxrss") == 0 || name.compare("minflt") == 0);
+ assert(evt.values.size() == 1);
+ auto &val = evt.values.begin()->second;
+ if (_ts_to_values.find(ts) == _ts_to_values.end())
+ {
+ std::pair<uint32_t, uint32_t> values;
+ if (name.compare("maxrss") == 0)
+ values.first = std::stoul(val);
+ else
+ values.second = std::stoul(val);
+ _ts_to_values.insert({ts, values});
+ }
+ else
+ {
+ auto &values = _ts_to_values.at(ts);
+ if (name.compare("maxrss") == 0)
+ values.first = std::stoul(val);
+ else
+ values.second = std::stoul(val);
+ }
+ }
+#endif
+ }
+
+ MDTableBuilder &build()
+ {
+ for (auto &it : divideGraph())
+ {
+ size_t begin_idx = it.first;
+ size_t end_idx = it.second;
+ std::map<std::string, Operation> name_to_op;
+ for (size_t i = begin_idx + 1; i < end_idx; ++i)
+ {
+ const auto *evt = dynamic_cast<const OpSeqDurationEvent *>(_duration_events[i].get());
+ if (evt == nullptr)
+ continue;
+
+ const std::string evt_name = getLabel(*evt);
+ assert(evt->ph.compare("B") == 0 || evt->ph.compare("E") == 0);
+ if (evt->ph.compare("B") == 0)
+ {
+ assert(name_to_op.find(evt_name) == name_to_op.end());
+ name_to_op.insert({evt_name, makeOperation(*evt)});
+ }
+ else
+ {
+ assert(name_to_op.find(evt_name) != name_to_op.end());
+ auto &op = name_to_op.at(evt_name);
+ updateOperation(op, *evt);
+ }
+ }
+
+ _graphs.emplace_back(makeGraph(begin_idx, end_idx, name_to_op));
+ }
+
+ return *this;
+ }
+
+ std::vector<std::pair<size_t, size_t>> divideGraph()
+ {
+ std::vector<std::pair<size_t, size_t>> graph_idx_list; // pair<begin_idx, end_idx>
+ for (size_t i = 0, begin_idx = 0; i < _duration_events.size(); ++i)
+ {
+ const auto subg_evt = dynamic_cast<const SubgDurationEvent *>(_duration_events.at(i).get());
+ if (subg_evt == nullptr)
+ continue;
+
+ if (subg_evt->ph.compare("B") == 0)
+ begin_idx = i;
+ else
+ graph_idx_list.emplace_back(begin_idx, i);
+ }
+ return graph_idx_list;
+ }
+
+ Operation makeOperation(const OpSeqDurationEvent &evt)
+ {
+ Operation op;
+ const std::string &evt_name = getLabel(evt);
+ op.name = evt_name;
+ op.begin_ts = std::stoull(evt.ts);
+ op.backend = evt.backend;
+#ifdef DEBUG
+ op.updateRss(_ts_to_values.at(op.begin_ts).first);
+ op.updateMinflt(_ts_to_values.at(op.begin_ts).second);
+#else
+ op.updateRss(0);
+ op.updateMinflt(0);
+#endif
+ return op;
+ }
+
+ void updateOperation(Operation &op, const DurationEvent &evt)
+ {
+ op.end_ts = std::stoull(evt.ts);
+#ifdef DEBUG
+ op.updateRss(_ts_to_values.at(op.end_ts).first);
+ op.updateMinflt(_ts_to_values.at(op.end_ts).second);
+#else
+ op.updateRss(0);
+ op.updateMinflt(0);
+#endif
+ }
+
+ Graph makeGraph(size_t begin_idx, size_t end_idx,
+ const std::map<std::string, Operation> &name_to_op)
+ {
+ Graph graph;
+ graph.name = "Subgraph";
+ graph.begin_ts = std::stoull(_duration_events[begin_idx]->ts);
+ graph.end_ts = std::stoull(_duration_events[end_idx]->ts);
+ graph.setOperations(name_to_op);
+
+ for (auto &arg : _duration_events[end_idx]->args)
+ {
+ if (arg.first == "session")
+ graph.session_index = arg.second;
+ if (arg.first == "subgraph")
+ graph.subgraph_index = arg.second;
+ }
+
+#ifdef DEBUG
+ graph.updateRss(_ts_to_values.at(graph.begin_ts).first);
+ graph.updateMinflt(_ts_to_values.at(graph.begin_ts).second);
+ graph.updateRss(_ts_to_values.at(graph.end_ts).first);
+ graph.updateMinflt(_ts_to_values.at(graph.end_ts).second);
+#else
+ graph.updateRss(0);
+ graph.updateMinflt(0);
+#endif
+ return graph;
+ }
+
+ void write(std::ostream &os)
+ {
+ // Write contents
+ for (size_t i = 0; i < _graphs.size(); ++i)
+ {
+ auto &graph = _graphs.at(i);
+ os << "# Session: " << graph.session_index << ", Subgraph: " << graph.subgraph_index
+ << ", Running count: " << i << "\n";
+ _graphs.at(i).write(os);
+ }
+ }
+
+ const std::vector<std::unique_ptr<DurationEvent>> &_duration_events;
+ const std::vector<CounterEvent> &_counter_events;
+
+ // timestamp to std::pair<maxrss, minflt>
+ std::unordered_map<uint64_t, std::pair<uint32_t, uint32_t>> _ts_to_values;
+ std::vector<Graph> _graphs;
+};
+
+} // namespace
+
+void MDTableWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &records)
+{
+ for (auto &recorder : records)
+ {
+ MDTableBuilder(recorder->duration_events(), recorder->counter_events()).build().write(_os);
+ }
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/EventWriter.h"
+
+#include <unordered_map>
+#include <json/json.h>
+#include <cassert>
+#include <utility>
+
+/**
+ * @brief Version of SNPE format
+ * In version 1
+ * - There is no "version" field in Json
+ * - Only one subgraph is supported
+ * - Operation name is a form of "$3 ADD"
+ *
+ * In version 2,
+ * - "version" : "2" was added in Json
+ * - Multiple session and multiple subgraphs are supported
+ * - When there is only one session, operation name is a form of "$2 subgraph $3 ADD",
+ * meaning ADD op whose operation index 3 in a subgraph whose index is 2
+ * - When there are two or more sessions, operation name is a form of
+ * "$1 session $2 subgraph $3 ADD", meaning ADD op whose operation index 3
+ * in a subgraph whose index is 2, which was run in 1st session.
+ */
+#define SNPE_JSON_SCHEMA_VERSION "2"
+
+namespace
+{
+
+std::string getLabel(const DurationEvent &evt)
+{
+ if (auto evt_ptr = dynamic_cast<const OpSeqDurationEvent *>(&evt))
+ {
+ std::string subg_label("$" + std::to_string(evt_ptr->subg_index) + " subgraph");
+ std::string op_label("$" + std::to_string(evt_ptr->op_index) + " " + evt_ptr->op_name);
+
+ // Note : At this moment, there is only one thread running for EventWriter
+ if (evt_ptr->tracing_ctx->hasMultipleSessions())
+ {
+ std::string session_label("$" + std::to_string(evt_ptr->session_index) + " session");
+ return session_label + " " + subg_label + " " + op_label;
+ }
+ else
+ {
+ // When there is only one session, do not include session info
+ // Refer to https://github.sec.samsung.net/STAR/nnfw/issues/11436#issuecomment-930332
+ return subg_label + " " + op_label;
+ }
+ }
+ else // SubgEvent
+ return "Graph";
+}
+
+std::string getBackend(const DurationEvent &evt)
+{
+ if (auto evt_ptr = dynamic_cast<const OpSeqDurationEvent *>(&evt))
+ return evt_ptr->backend;
+ else // SubbEvent
+ return "runtime";
+}
+
+} // namespace
+
+void SNPEWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &recorders)
+{
+ struct Stat
+ {
+ uint64_t sum = 0;
+ uint64_t count = 0;
+ uint64_t max = 0;
+ uint64_t min = std::numeric_limits<uint64_t>::max();
+
+ void accumulate(uint64_t val)
+ {
+ sum += val;
+ count++;
+ max = std::max(max, val);
+ min = std::min(min, val);
+ }
+ };
+
+ Json::Value root;
+ root["version"] = SNPE_JSON_SCHEMA_VERSION;
+
+ auto &exec_data = root["Execution_Data"] = Json::Value{Json::objectValue};
+
+ // Memory
+ {
+ std::unordered_map<std::string, Stat> mem_stats;
+ for (auto &recorder : recorders)
+ {
+ for (auto &evt : recorder->counter_events())
+ {
+ auto &mem_stat = mem_stats[evt.name];
+ uint64_t val = std::stoull(evt.values.at("value"));
+ mem_stat.accumulate(val);
+ }
+ }
+
+ auto &mem = exec_data["memory"] = Json::Value{Json::objectValue};
+ for (auto &kv : mem_stats)
+ {
+ auto &key = kv.first;
+ auto &val = kv.second;
+ mem[key]["Avg_Size"] = val.sum / val.count;
+ mem[key]["Max_Size"] = val.max;
+ mem[key]["Min_Size"] = val.min;
+ mem[key]["Runtime"] = "NA";
+ }
+ }
+
+ // Operation Execution Time
+ {
+ // NOTE This assumes _duration_events is sorted by "ts" ascending
+
+ // 2D keys : stats[tid][name]
+ std::unordered_map<std::string, std::unordered_map<std::string, Stat>> stats;
+ std::unordered_map<std::string, std::unordered_map<std::string, uint64_t>> begin_timestamps;
+ for (auto &recorder : recorders)
+ {
+ for (auto &evt : recorder->duration_events())
+ {
+ std::string evt_name = getLabel(*evt);
+ std::string evt_tid = getBackend(*evt);
+
+ auto &stat = stats[evt_tid][evt_name];
+ auto &begin_ts = begin_timestamps[evt_tid][evt_name];
+ uint64_t timestamp = std::stoull(evt->ts);
+ if (evt->ph == "B")
+ {
+ if (begin_ts != 0)
+ throw std::runtime_error{"Invalid Data"};
+ begin_ts = timestamp;
+ }
+ else if (evt->ph == "E")
+ {
+ if (begin_ts == 0 || timestamp < begin_ts)
+ throw std::runtime_error{"Invalid Data"};
+ stat.accumulate(timestamp - begin_ts);
+ begin_ts = 0;
+ }
+ else
+ throw std::runtime_error{"Invalid Data - invalid value for \"ph\" : \"" + evt->ph + "\""};
+ }
+ }
+
+ for (auto &kv : begin_timestamps)
+ for (auto &kv2 : kv.second)
+ if (kv2.second != 0)
+ throw std::runtime_error{"Invalid Data - B and E pair does not match."};
+
+ for (auto &kv : stats)
+ {
+ auto &tid = kv.first;
+ auto &map = kv.second;
+ auto &json_tid = exec_data[tid] = Json::Value{Json::objectValue};
+ for (auto &kv : map)
+ {
+ auto &name = kv.first;
+ auto &val = kv.second;
+ json_tid[name]["Avg_Time"] = val.sum / val.count;
+ json_tid[name]["Max_Time"] = val.max;
+ json_tid[name]["Min_Time"] = val.min;
+ json_tid[name]["Runtime"] = tid;
+ }
+ }
+ }
+
+ _os << root;
+}
break;
case ir::PaddingType::EXPLICIT:
out_h =
- (in_h + pad.param.top + pad.param.bottom - effective_filter_h_size) / stride.vertical + 1;
+ (in_h + pad.param.top + pad.param.bottom - effective_filter_h_size) / stride.vertical + 1;
out_w =
- (in_w + pad.param.left + pad.param.right - effective_filter_w_size) / stride.horizontal +
- 1;
+ (in_w + pad.param.left + pad.param.right - effective_filter_w_size) / stride.horizontal + 1;
break;
default:
assert(false);
ir::Shape out_shape(static_cast<int>(1));
out_shape.dim(0) =
- (std::is_integral<T>::value
- ? ((std::abs(start_val - limit_val) + std::abs(delta_val) - 1) / std::abs(delta_val))
- : std::ceil(std::abs((start_val - limit_val) / delta_val)));
+ (std::is_integral<T>::value
+ ? ((std::abs(start_val - limit_val) + std::abs(delta_val) - 1) / std::abs(delta_val))
+ : std::ceil(std::abs((start_val - limit_val) / delta_val)));
return out_shape;
}
ir::Shape true_shape = input_true_shape;
ir::Shape false_shape = input_false_shape;
int most_rank =
- (cond_shape.rank() >= true_shape.rank()) && (cond_shape.rank() >= false_shape.rank())
- ? cond_shape.rank()
- : (false_shape.rank() >= true_shape.rank() ? false_shape.rank() : true_shape.rank());
+ (cond_shape.rank() >= true_shape.rank()) && (cond_shape.rank() >= false_shape.rank())
+ ? cond_shape.rank()
+ : (false_shape.rank() >= true_shape.rank() ? false_shape.rank() : true_shape.rank());
ir::Shape calculate_shape(most_rank);
for (int i = 0; i < most_rank; ++i)
{
calculate_shape.dim(i) =
- (cond_shape.dim(i) >= true_shape.dim(i)) && (cond_shape.dim(i) >= false_shape.dim(i))
- ? cond_shape.dim(i)
- : (false_shape.dim(i) >= true_shape.dim(i) ? false_shape.dim(i) : true_shape.dim(i));
+ (cond_shape.dim(i) >= true_shape.dim(i)) && (cond_shape.dim(i) >= false_shape.dim(i))
+ ? cond_shape.dim(i)
+ : (false_shape.dim(i) >= true_shape.dim(i) ? false_shape.dim(i) : true_shape.dim(i));
if ((cond_shape.dim(i) != calculate_shape.dim(i) && cond_shape.dim(i) != 1) ||
(true_shape.dim(i) != calculate_shape.dim(i) && true_shape.dim(i) != 1) ||
return new_shape;
}
-ir::Shape inferSliceShape(const ir::Shape &input_shape, const int32_t *begins_buf,
- const int32_t *sizes_buf)
+template <typename T>
+ir::Shape inferSliceShape(const ir::Shape &input_shape, const T *begins_buf, const T *sizes_buf)
{
const uint32_t rank = input_shape.rank();
ir::Shape out_shape(rank);
}
else
{
- if (input_dim < begin + size)
+ if (input_dim < static_cast<int32_t>(begin + size))
throw std::runtime_error("shape inference Slice: Invalid begin and size.");
}
- out_shape.dim(idx) = size;
+ out_shape.dim(idx) = static_cast<int32_t>(size);
}
return out_shape;
}
+// template instantiation
+template ir::Shape inferSliceShape(const ir::Shape &input_shape, const int32_t *begins_buf,
+ const int32_t *sizes_buf);
+template ir::Shape inferSliceShape(const ir::Shape &input_shape, const int64_t *begins_buf,
+ const int64_t *sizes_buf);
ir::Shape inferSpaceToBatchNDShape(const ir::Shape &input_shape, const ir::Shape &block_shape_shape,
const ir::Shape &padding_shape, const int32_t *block_shape_buf,
for (int dim = 0; dim < kSpatialDimensionNum; ++dim)
{
int final_dim_size =
- (input_shape.dim(dim + 1) + padding_buf[dim * 2] + padding_buf[dim * 2 + 1]);
+ (input_shape.dim(dim + 1) + padding_buf[dim * 2] + padding_buf[dim * 2 + 1]);
assert(final_dim_size % block_shape_buf[dim] == 0);
if (!(current >= 0 && current < shape_rank && in_shape.dim(current) == 1))
{
throw std::runtime_error(
- "The following conditions must be met: 0 <= dim < Shape rank, dim == 1");
+ "The following conditions must be met: 0 <= dim < Shape rank, dim == 1");
}
if (!should_squeeze[current])
{
if (multiplier_size != in_shape.rank())
{
- throw std::runtime_error("inferTileShape failed, input rank: " +
- std::to_string(in_shape.rank()) + ", bad multipliers size: " +
- std::to_string(multiplier_size) + "");
+ throw std::runtime_error(
+ "inferTileShape failed, input rank: " + std::to_string(in_shape.rank()) +
+ ", bad multipliers size: " + std::to_string(multiplier_size) + "");
}
ir::Shape new_Shape(in_shape.rank());
// initializing static member var
std::mutex TracingCtx::_session_id_mutex;
+uint32_t TracingCtx::_next_session_id = 0;
} // namespace util
} // namespace onert
+++ /dev/null
-../../../.clang-format.8
\ No newline at end of file
*
* @param file_path
*/
- void loadFromFile(const char *file_path);
+ void loadFromFile(const std::string &file_path);
/**
* @brief Load a model from a buffer
*
// Create operands form tflite::Tensor
ir::OperandIndex loadOperand(const Tensor *tensor, ir::Graph &subg);
- void loadSparsity(const Tensor *tensor, const ir::Shape &shape, ir::TypeInfo &typeInfo);
+ void loadQuantization(const Tensor *tensor, ir::TypeInfo &typeInfo);
+ void loadSparsity(const Tensor *tensor, ir::TypeInfo &typeInfo);
void loadOperationIO(const Operator *op, ir::OperandIndexSequence &inputs,
ir::OperandIndexSequence &outputs);
// Create operations from Operator
std::unique_ptr<Verifier> _verifier;
// Boolean flag to use MMAPED_DATA
bool _use_mmaped_data = false;
+
+ std::unordered_map<uint32_t /* Buffer Index in circle file */, std::shared_ptr<ir::Data>>
+ _buf_to_data;
};
template <typename LoaderDomain>
-void BaseLoader<LoaderDomain>::BaseLoader::loadFromFile(const char *file_path)
+void BaseLoader<LoaderDomain>::BaseLoader::loadFromFile(const std::string &file_path)
{
- _fd = open(file_path, O_RDONLY);
+ _fd = open(file_path.c_str(), O_RDONLY);
if (_fd < 0)
{
- throw std::runtime_error("Failed to open file " + std::string(file_path));
+ throw std::runtime_error("Failed to open file " + file_path);
}
struct stat file_stat;
if (fstat(_fd, &file_stat) != 0)
{
- throw std::runtime_error("Fstat failed or file " + std::string(file_path) +
- " is not a regular file");
+ throw std::runtime_error("Fstat failed or file " + file_path + " is not a regular file");
}
int size = file_stat.st_size;
// If app wants to change the input shape, call nnfw_apply_input_tensorinfo() can
// be used.
- // Type
- ir::DataType data_type = tensorTypeToDataType(tensor->type());
- // Quantization
- auto q_params = tensor->quantization();
- float scale = 0.0;
- long zero_point = 0;
- if (q_params != nullptr)
- {
- if (q_params->scale())
- {
- if (q_params->scale()->size() != 1)
- {
- throw std::runtime_error("Only 1 scale for a tensor is supported.");
- }
- scale = q_params->scale()->Get(0);
- }
-
- if (q_params->zero_point())
- {
- if (q_params->zero_point()->size() != 1)
- {
- throw std::runtime_error("Only 1 zero_point value for a tensor is supported.");
- }
- zero_point = q_params->zero_point()->Get(0);
- // zero_point is long while TypeInfo.zero_point is defined as int32_t.
- assert(zero_point >= std::numeric_limits<int32_t>::min());
- assert(zero_point <= std::numeric_limits<int32_t>::max());
- }
- auto details = q_params->details_as_CustomQuantization();
- if (details != nullptr)
- throw std::runtime_error("Custom Quantization is not supported");
- }
- // Create TypeInfo
- ir::TypeInfo type_info(data_type, scale, zero_point);
- // Sparsity
- loadSparsity(tensor, shape, type_info);
+ // TypeInfo
+ ir::TypeInfo type_info(tensorTypeToDataType(tensor->type()));
+ loadQuantization(tensor, type_info);
+ loadSparsity(tensor, type_info);
// Create operand
const auto operand_index = subg.addOperand(shape, type_info);
if (data != nullptr)
{
using std::ptrdiff_t;
- std::unique_ptr<ir::Data> data_obj;
+ std::shared_ptr<ir::Data> data_obj;
+
if (_fd == -1) // Model is from memory
{
- data_obj = std::make_unique<ir::ExternalData>(data->data(), data->size());
+ data_obj = std::make_shared<ir::ExternalData>(data->data(), data->size());
}
else // Model is loaded(mmap'd) from a file
{
ptrdiff_t aligned_offset_start = (unaligned_offset_start / _pagesize) * _pagesize;
size_t mmap_size = offset_end - aligned_offset_start;
- if (_use_mmaped_data)
+ uint32_t buf_idx = tensor->buffer();
+ auto buffer_found = _buf_to_data.find(buf_idx);
+
+ if (buffer_found != _buf_to_data.end())
+ {
+ // Another tensor points this buffer and its matching Data(either CachedData or MMapedData)
+ // was already created. Let's reuse the Data
+ data_obj = buffer_found->second;
+ }
+ else if (_use_mmaped_data)
{
- data_obj = std::make_unique<ir::MMapedData>(_fd, aligned_offset_start, mmap_size,
+ data_obj = std::make_shared<ir::MMapedData>(_fd, aligned_offset_start, mmap_size,
unaligned_offset_start, data_size);
+ _buf_to_data[buf_idx] = data_obj;
}
else
{
size_t offset = unaligned_offset_start - aligned_offset_start;
uint8_t *mmap_base = static_cast<uint8_t *>(
mmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE, _fd, aligned_offset_start));
- data_obj = std::make_unique<ir::CachedData>(mmap_base + offset, data_size);
+
+ data_obj = std::make_shared<ir::CachedData>(mmap_base + offset, data_size);
+ _buf_to_data[buf_idx] = data_obj;
+
munmap(mmap_base, mmap_size);
}
}
}
template <typename LoaderDomain>
-void BaseLoader<LoaderDomain>::loadSparsity(const Tensor *tensor, const ir::Shape &shape,
- ir::TypeInfo &typeInfo)
+void BaseLoader<LoaderDomain>::loadQuantization(const Tensor *tensor, ir::TypeInfo &typeInfo)
+{
+ auto q_params = tensor->quantization();
+ if (q_params == nullptr || q_params->scale() == nullptr || q_params->scale()->size() == 0)
+ {
+ typeInfo.quantization(0., 0);
+ return;
+ }
+ if (q_params->zero_point() == nullptr)
+ {
+ throw std::runtime_error("Quantization params: scale is not null, but zero_point is null.");
+ }
+ const size_t num_scales = q_params->scale()->size();
+ if (num_scales != q_params->zero_point()->size())
+ {
+ throw std::runtime_error("Quantization params: scale size != zero_point size");
+ }
+ std::vector<float> scales;
+ std::vector<int32_t> zero_points;
+ scales.resize(num_scales);
+ zero_points.resize(num_scales);
+ for (size_t i = 0; i < num_scales; ++i)
+ {
+ scales[i] = q_params->scale()->Get(i);
+ // zero_point is defined as long (i64) in schema while TypeInfo's zero_point is int32_t.
+ // int64_t is used instead of long because long is 4 byte in most 32bit architecture.
+ int64_t zero_point = q_params->zero_point()->Get(i);
+ if (zero_point < std::numeric_limits<int32_t>::min() ||
+ zero_point > std::numeric_limits<int32_t>::max())
+ throw std::runtime_error("Zero_point is out of int32 range.");
+ zero_points[i] = static_cast<int32_t>(zero_point);
+ }
+ auto details = q_params->details_as_CustomQuantization();
+ if (details != nullptr)
+ throw std::runtime_error("Custom Quantization is not supported");
+ typeInfo.quantization(std::move(scales), std::move(zero_points));
+}
+
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadSparsity(const Tensor *tensor, ir::TypeInfo &typeInfo)
{
auto src_sparsity = tensor->sparsity();
if (src_sparsity != nullptr)
}
}
// load metadata
- const int dim_metadata_size = src_sparsity->dim_metadata()->size();
- auto dense_rank = shape.rank();
+ const auto dim_metadata_size = src_sparsity->dim_metadata()->size();
+ const auto dense_rank = tensor->shape() ? tensor->shape()->size() : 0;
if (dense_rank + block_rank != dim_metadata_size)
throw std::runtime_error("sparsity dim_metadata length is wrong.");
bool random_sparsity = dim_metadata_size == 2 && block_rank == 0;
set(CIRCLE_LOADER_SOURCES src/circle_loader.cc)
-add_library(circle_loader SHARED ${CIRCLE_LOADER_SOURCES})
+add_library(circle_loader STATIC ${CIRCLE_LOADER_SOURCES})
+set_target_properties(circle_loader PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(circle_loader PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
target_link_libraries(circle_loader PRIVATE onert_core)
target_link_libraries(circle_loader PRIVATE base_loader nnfw_common nnfw_coverage)
target_link_libraries(circle_loader PRIVATE circle_schema)
-
-if(CMAKE_BUILD_TYPE_LC STREQUAL "release")
- add_custom_command(TARGET circle_loader POST_BUILD
- COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:circle_loader>)
-endif()
-
-install(TARGETS circle_loader DESTINATION lib)
{
namespace circle_loader
{
-std::unique_ptr<ir::Subgraphs> loadModel(const char *filename);
+std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename);
std::unique_ptr<ir::Subgraphs> loadModel(uint8_t *buffer, size_t size);
} // namespace circle_loader
} // namespace onert
subg->setLayout(convertDataFormat(circle_subg->data_format()));
- subg->finishBuilding();
+ subg->verify();
return subg;
}
} // namespace
-std::unique_ptr<ir::Subgraphs> loadModel(const char *filename)
+std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename)
{
auto subgraphs = std::make_unique<ir::Subgraphs>();
CircleLoader loader(subgraphs);
// ANeuralNetworksModel
//
ANeuralNetworksModel::ANeuralNetworksModel() noexcept
- : _optional_operands{}, _operand_usages{}, _allowFloat32toFloat16{false}
+ : _finished_building{false}, _optional_operands{}, _operand_usages{}, _allowFloat32toFloat16{
+ false}
{
_graph = std::make_shared<onert::ir::Graph>();
}
{
fillOptionalOperand();
- _graph->finishBuilding();
-
+ _graph->verify();
_operand_usages.clear();
+ _finished_building = true;
}
catch (const std::exception &e)
{
return true;
}
-bool ANeuralNetworksModel::isFinished() noexcept { return !_graph->isBuildingPhase(); }
+bool ANeuralNetworksModel::isFinished() noexcept { return _finished_building; }
bool ANeuralNetworksModel::isExistOperand(uint32_t index) noexcept
{
private:
std::shared_ptr<onert::ir::Graph> _graph;
+ bool _finished_building;
std::unordered_set<onert::ir::OperandIndex> _optional_operands;
std::vector<OperandUsage> _operand_usages;
bool _allowFloat32toFloat16;
set(TFLITE_LOADER_SOURCES src/tflite_loader.cc)
-add_library(tflite_loader SHARED ${TFLITE_LOADER_SOURCES})
+add_library(tflite_loader STATIC ${TFLITE_LOADER_SOURCES})
+set_target_properties(tflite_loader PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(tflite_loader PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
target_link_libraries(tflite_loader PRIVATE onert_core)
target_link_libraries(tflite_loader PRIVATE base_loader nnfw_common nnfw_coverage)
-
-if(CMAKE_BUILD_TYPE_LC STREQUAL "release")
- add_custom_command(TARGET tflite_loader POST_BUILD
- COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:tflite_loader>)
-endif()
-
-install(TARGETS tflite_loader DESTINATION lib)
namespace tflite_loader
{
-std::unique_ptr<ir::Subgraphs> loadModel(const char *filename);
+std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename);
} // namespace tflite_loader
} // namespace onert
loadOperation(op, *subg);
}
- subg->finishBuilding();
+ subg->verify();
return subg;
}
} // namespace
-std::unique_ptr<ir::Subgraphs> loadModel(const char *filename)
+std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename)
{
auto subgraphs = std::make_unique<ir::Subgraphs>();
TFLiteLoader loader(subgraphs);
+++ /dev/null
-../../../.clang-format.8
\ No newline at end of file
+++ /dev/null
-../../../.clang-format.8
\ No newline at end of file
bool supportFP16() override { return false; }
};
+class MockBackendContext : public BackendContext
+{
+public:
+ using BackendContext::BackendContext;
+ ITensorRegistry *genTensors() override { return nullptr; }
+ FunctionMap genKernels() override { return {}; }
+};
+
struct MockBackendCPU : public Backend
{
std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigCPU>(); }
- std::unique_ptr<BackendContext>
- newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override
+ std::unique_ptr<BackendContext> newContext(ContextData &&data) const override
{
- return std::unique_ptr<BackendContext>(new BackendContext{this, nullptr});
+ return std::make_unique<MockBackendContext>(this, std::move(data), nullptr);
}
};
struct MockBackendGPU : public Backend
{
std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigGPU>(); }
- std::unique_ptr<BackendContext>
- newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override
+ std::unique_ptr<BackendContext> newContext(ContextData &&data) const override
{
- return std::unique_ptr<BackendContext>(new BackendContext{this, nullptr});
+ return std::make_unique<MockBackendContext>(this, std::move(data), nullptr);
}
};
struct MockBackendNPU : public Backend
{
std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigNPU>(); }
- std::unique_ptr<BackendContext>
- newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override
+ std::unique_ptr<BackendContext> newContext(ContextData &&data) const override
{
- return std::unique_ptr<BackendContext>(new BackendContext{this, nullptr});
+ return std::make_unique<MockBackendContext>(this, std::move(data), nullptr);
}
};
BinaryArithmetic::Param mul_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
create<BinaryArithmetic>(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx}, mul_op_params);
- graph->finishBuilding();
+ graph->verify();
return graph;
}
BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
create<BinaryArithmetic>(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx}, sub_op_params);
- graph->finishBuilding();
+ graph->verify();
return graph;
}
setenv("PROFILING_MODE", _original_profiling_mode.c_str(), true);
}
- backend::BackendContexts buildBackendContexts(const Graph &graph)
- {
- backend::BackendContexts contexts;
- for (auto backend : _mock_backends)
- {
- contexts.emplace(backend, backend->newContext(graph, nullptr, false));
- }
- return contexts;
- }
-
const MockBackendCPU *_cpu_backend{nullptr};
const MockBackendGPU *_gpu_backend{nullptr};
const MockBackendNPU *_npu_backend{nullptr};
et.storeOperationsExecTime();
// Test scheduler
- auto backend_contexts = buildBackendContexts(*graph);
- auto scheduler = compiler::HEScheduler(backend_contexts,
- compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
+ auto scheduler =
+ compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
const auto br = scheduler.schedule(*graph);
ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "gpu");
setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1e5);
// Test scheduler
- auto backend_contexts = buildBackendContexts(*graph);
- auto scheduler = compiler::HEScheduler(backend_contexts,
- compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
+ auto scheduler =
+ compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
const auto br = scheduler.schedule(*graph);
ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu");
et.storeOperationsExecTime();
// Test scheduler
- auto backend_contexts = buildBackendContexts(*graph);
- auto scheduler = compiler::HEScheduler(backend_contexts,
- compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
+ auto scheduler =
+ compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
const auto br = scheduler.schedule(*graph);
std::string branch1_expected_backend("npu"), branch2_expected_backend("npu");
et.storeOperationsExecTime();
// Test scheduler
- auto backend_contexts = buildBackendContexts(*graph);
- auto scheduler = compiler::HEScheduler(backend_contexts,
- compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
+ auto scheduler =
+ compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
const auto br = scheduler.schedule(*graph);
ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu");
ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
et.storeOperationsExecTime();
// Test scheduler
- auto backend_contexts = buildBackendContexts(*graph);
- auto scheduler = compiler::HEScheduler(backend_contexts,
- compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
+ auto scheduler =
+ compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
const auto br = scheduler.schedule(*graph);
ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu");
et.storeOperationsExecTime();
// Test scheduler
- auto backend_contexts = buildBackendContexts(*graph);
- auto scheduler = compiler::HEScheduler(backend_contexts,
- compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
+ auto scheduler =
+ compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
const auto br = scheduler.schedule(*graph);
ASSERT_NE(br->getBackend(add_op_idx)->config()->id(),
br->getBackend(mul1_op_idx)->config()->id());
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include "ir/Graph.h"
+#include "compiler/pass/UnusedOperandEliminationPass.h"
+
+using namespace onert::ir;
+using namespace onert::compiler::pass;
+
+TEST(UnusedOperandEliminationPass, Simple)
+{
+ Graph graph;
+
+ // Add tensors
+ Shape shape{1, 2, 2, 1};
+ TypeInfo type{DataType::FLOAT32};
+ auto in = graph.addOperand(shape, type);
+ auto out = graph.addOperand(shape, type);
+
+ auto unused = graph.addOperand(shape, type);
+
+ // Set model inputs/outputs
+ graph.addInput(in);
+ graph.addOutput(out);
+
+ UnusedOperandEliminationPass{graph}.run();
+
+ ASSERT_TRUE(graph.operands().exist(in));
+ ASSERT_TRUE(graph.operands().exist(out));
+ ASSERT_FALSE(graph.operands().exist(unused));
+}
graph->addInput(operand_lhs);
graph->addInput(operand_rhs1);
graph->addOutput(operand_result2);
- graph->finishBuilding();
+ graph->verify();
// Compile
auto subgs = std::make_shared<onert::ir::Subgraphs>();
{
return std::make_shared<MockConfig>();
}
- std::unique_ptr<BackendContext> newContext(const ir::Graph &,
- const std::shared_ptr<custom::IKernelBuilder> &kb,
- bool) const override
+ std::unique_ptr<onert::backend::BackendContext> newContext(ContextData &&) const override
{
return nullptr;
}
_graph->getInputs().append(operand_rhs);
_graph->getOutputs().append(operand_result);
- _graph->finishBuilding();
+ _graph->verify();
auto subgs = std::make_shared<onert::ir::Subgraphs>();
subgs->push(onert::ir::SubgraphIndex{0}, _graph);
_graph->getInputs().append(operand_rhs1);
_graph->getOutputs().append(operand_result2);
- _graph->finishBuilding();
+ _graph->verify();
auto subgs = std::make_shared<onert::ir::Subgraphs>();
subgs->push(onert::ir::SubgraphIndex{0}, _graph);
_graph->getInputs().append(operand_rhs);
_graph->getOutputs().append(operand_result);
- _graph->finishBuilding();
+ _graph->verify();
auto subgs = std::make_shared<onert::ir::Subgraphs>();
subgs->push(onert::ir::SubgraphIndex{0}, _graph);
TEST_F(InterpExecutorTest, create_empty)
{
Graph graph;
- graph.finishBuilding();
+ graph.verify();
auto executor = std::make_unique<InterpExecutor>(graph);
ASSERT_NE(executor, nullptr);
}
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "ir/Graph.h"
+#include "ir/operation/BinaryArithmetic.h"
+#include "ir/verifier/Verifier.h"
+
+TEST(Graph, neg_inputs_and_outputs)
+{
+ onert::ir::Graph graph;
+
+ onert::ir::OperandIndex index0{0u};
+ onert::ir::OperandIndex index1{1u};
+
+ graph.addInput({index0});
+ graph.addInput({index1});
+
+ onert::ir::OperandIndex index10{10u};
+ onert::ir::OperandIndex index11{11u};
+ onert::ir::OperandIndex index12{12u};
+
+ graph.addOutput({index10});
+ graph.addOutput({index11});
+ graph.addOutput({index12});
+
+ ASSERT_EQ(graph.getInputs().size(), 2);
+ ASSERT_EQ(graph.getOutputs().size(), 3);
+
+ onert::ir::IOIndex io_index0{0};
+ onert::ir::IOIndex io_index1{1};
+ onert::ir::IOIndex io_index2{2};
+
+ ASSERT_EQ(graph.getInputs().at(io_index0), 0);
+ ASSERT_EQ(graph.getInputs().at(io_index1), 1);
+
+ ASSERT_EQ(graph.getOutputs().at(io_index0), 10);
+ ASSERT_EQ(graph.getOutputs().at(io_index1), 11);
+ ASSERT_EQ(graph.getOutputs().at(io_index2), 12);
+
+ EXPECT_THROW(graph.getOutputs().at(onert::ir::IOIndex{3}), std::out_of_range);
+}
+
+using namespace onert::ir;
+
+OperationIndex addAddOperation(Graph &graph, const OperandIndexSequence inputs,
+ const OperandIndexSequence outputs)
+{
+ // Add "ADD" operation
+ operation::BinaryArithmetic::Param param;
+ param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+ param.activation = Activation::NONE;
+ return graph.addOperation(std::make_unique<operation::BinaryArithmetic>(inputs, outputs, param));
+}
+
+TEST(Graph, OneOpGraphSimpleValid)
+{
+ // Simple Graph with just one Add operation
+
+ Graph graph;
+
+ // Add tensors
+ Shape shape{1, 2, 2, 1};
+ TypeInfo type{DataType::FLOAT32};
+ auto lhs = graph.addOperand(shape, type);
+ auto rhs = graph.addOperand(shape, type);
+ auto res = graph.addOperand(shape, type);
+
+ addAddOperation(graph, {lhs, rhs}, {res});
+
+ // Set model inputs/outputs
+ graph.addInput(lhs);
+ graph.addInput(rhs);
+ graph.addOutput(res);
+
+ graph.verify();
+
+ SUCCEED();
+}
+
+TEST(Graph, neg_InvalidGraph_BadInput)
+{
+ Graph graph;
+
+ // Add tensors
+ Shape shape{1, 2, 2, 1};
+ TypeInfo type{DataType::FLOAT32};
+ auto in = graph.addOperand(shape, type);
+ auto out = graph.addOperand(shape, type);
+
+ // Set model inputs/outputs
+ graph.addInput(in);
+ graph.addOutput(out);
+ graph.addInput(OperandIndex{89}); // Non-exisiting operand!
+
+ EXPECT_ANY_THROW(graph.verify());
+}
+
+TEST(Graph, neg_InvalidGraph_BadOutput)
+{
+ Graph graph;
+
+ // Add tensors
+ Shape shape{1, 2, 2, 1};
+ TypeInfo type{DataType::FLOAT32};
+ auto in = graph.addOperand(shape, type);
+ auto out = graph.addOperand(shape, type);
+
+ // Set model inputs/outputs
+ graph.addInput(in);
+ graph.addOutput(out);
+ graph.addOutput(OperandIndex{12}); // Non-exisiting operand!
+
+ EXPECT_ANY_THROW(graph.verify());
+}
+
+TEST(Graph, neg_InvalidAddOperation_BadInputIndex)
+{
+ Graph graph;
+
+ // Add tensors
+ Shape shape{1, 2, 2, 1};
+ TypeInfo type{DataType::FLOAT32};
+ auto lhs = graph.addOperand(shape, type);
+ auto rhs = graph.addOperand(shape, type);
+ auto res = graph.addOperand(shape, type);
+
+ // Set model inputs/outputs
+ graph.addInput(lhs);
+ graph.addInput(rhs);
+ graph.addOutput(res);
+
+ ASSERT_FALSE(addAddOperation(graph, {lhs, OperandIndex{99}}, {res}).valid());
+}
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "ir/LayoutSet.h"
+
+using onert::ir::Layout;
+using onert::ir::LayoutSet;
+
+TEST(ir_LayoutSet, neg_add_remove)
+{
+ LayoutSet set{Layout::NCHW};
+ set.remove(Layout::NHWC);
+ ASSERT_EQ(set.size(), 1);
+ set.add(Layout::NHWC);
+ ASSERT_EQ(set.size(), 2);
+ set.remove(Layout::NHWC);
+ ASSERT_EQ(set.size(), 1);
+ set.remove(Layout::NCHW);
+ ASSERT_EQ(set.size(), 0);
+ set.remove(Layout::NCHW);
+ ASSERT_EQ(set.size(), 0);
+}
+
+TEST(ir_LayoutSet, neg_add_twice)
+{
+ LayoutSet set;
+ set.add(Layout::NHWC);
+ ASSERT_EQ(set.size(), 1);
+ set.add(Layout::NHWC);
+ ASSERT_EQ(set.size(), 1);
+}
+
+TEST(ir_LayoutSet, set_operators)
+{
+ LayoutSet set1{Layout::NCHW};
+ LayoutSet set2{Layout::NHWC};
+ LayoutSet set3 = set1 | set2;
+
+ ASSERT_EQ(set3.size(), 2);
+
+ ASSERT_EQ((set3 - set1).size(), 1);
+ ASSERT_EQ((set3 - set1).contains(Layout::NHWC), true);
+ ASSERT_EQ((set3 - set2).size(), 1);
+ ASSERT_EQ((set3 - set2).contains(Layout::NCHW), true);
+ ASSERT_EQ((set3 - set3).size(), 0);
+
+ ASSERT_EQ((set3 & set1).size(), 1);
+ ASSERT_EQ((set3 & set1).contains(Layout::NCHW), true);
+ ASSERT_EQ((set3 & set2).size(), 1);
+ ASSERT_EQ((set3 & set2).contains(Layout::NHWC), true);
+ ASSERT_EQ((set1 & set2).size(), 0);
+}
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_TEST_GRAPH_MOCK_NODE_H__
+#define __ONERT_TEST_GRAPH_MOCK_NODE_H__
+
+#include "ir/Operation.h"
+#include "ir/OperandIndexSequence.h"
+
+namespace onert_test
+{
+namespace ir
+{
+
+class SimpleMock : public onert::ir::Operation
+{
+public:
+ SimpleMock(const onert::ir::OperandIndexSequence &inputs,
+ const onert::ir::OperandIndexSequence &outputs)
+ : Operation{onert::ir::OperandConstraint::createAny()}
+ {
+ setInputs(inputs);
+ setOutputs(outputs);
+ }
+
+public:
+ void accept(onert::ir::OperationVisitor &) const override {}
+ onert::ir::OpCode opcode() const final { return onert::ir::OpCode::Invalid; }
+};
+
+} // namespace ir
+} // namespace onert_test
+
+#endif // __ONERT_TEST_GRAPH_MOCK_NODE_H__
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "ir/OperandIndexSequence.h"
+
+using onert::ir::OperandIndex;
+using onert::ir::OperandIndexSequence;
+
+TEST(ir_OperandIndexSequence, neg_append)
+{
+ OperandIndexSequence iset{0, 2, 4, 8};
+
+ ASSERT_EQ(iset.size(), 4);
+
+ iset.append(OperandIndex{10});
+
+ ASSERT_EQ(iset.size(), 5);
+
+ onert::ir::IOIndex index1{1};
+ onert::ir::IOIndex index2{4};
+
+ ASSERT_EQ(iset.at(index1), 2);
+ ASSERT_EQ(iset.at(index2), 10);
+
+ ASSERT_TRUE(iset.contains(OperandIndex{2}));
+ ASSERT_TRUE(iset.contains(OperandIndex{10}));
+ ASSERT_FALSE(iset.contains(OperandIndex{11}));
+}
+
+TEST(graph_OperandIndexSequence, neg_replace)
+{
+ OperandIndexSequence iset{0, 1, 2, 3};
+
+ iset.replace(OperandIndex{1}, OperandIndex{9});
+ ASSERT_FALSE(iset.contains(OperandIndex{1}));
+ ASSERT_TRUE(iset.contains(OperandIndex{9}));
+}
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "ir/Operands.h"
+
+TEST(ir_Operands, neg_set_test)
+{
+ onert::ir::Operands set;
+
+ onert::ir::Shape shape0{1, 2, 3};
+
+ onert::ir::Shape shape1(4);
+ shape1.dim(0) = 10;
+ shape1.dim(1) = 20;
+ shape1.dim(2) = 30;
+ shape1.dim(3) = 40;
+
+ onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+ set.emplace(shape0, type);
+ set.emplace(shape1, type);
+
+ ASSERT_EQ(set.exist(onert::ir::OperandIndex{0u}), true);
+ ASSERT_EQ(set.exist(onert::ir::OperandIndex{1u}), true);
+ ASSERT_EQ(set.exist(onert::ir::OperandIndex{2u}), false);
+
+ ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(0), 1);
+ ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(1), 2);
+ ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(2), 3);
+}
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "MockNode.h"
+#include "ir/Operations.h"
+
+using onert::ir::Operation;
+using onert::ir::OperationIndex;
+using onert::ir::Operations;
+
+TEST(ir_Operations, basic)
+{
+ Operations ops;
+ ops.push(std::unique_ptr<Operation>(new onert_test::ir::SimpleMock({1, 2, 3, 4}, {5, 6, 7})));
+ OperationIndex idx{0u};
+ ASSERT_EQ(ops.at(idx).getInputs().size(), 4);
+ ASSERT_EQ(ops.at(idx).getOutputs().size(), 3);
+}
+
+TEST(ir_Operations, neg_at)
+{
+ Operations ops;
+ ops.push(std::unique_ptr<Operation>(new onert_test::ir::SimpleMock({1, 2, 3, 4}, {5, 6, 7})));
+ OperationIndex idx{99u};
+ EXPECT_THROW(ops.at(idx), std::out_of_range);
+}
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "ir/Graph.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexSequence.h"
+#include "ir/operation/Conv2D.h"
+#include "ir/operation/Concat.h"
+
+#include <memory>
+
+#include <stdexcept>
+
+using Index = onert::ir::IOIndex;
+using IndexSet = onert::ir::OperandIndexSequence;
+
+TEST(ir_Operation_setIO, operation_setIO_conv)
+{
+ onert::ir::Graph graph;
+
+ onert::ir::Shape shape{3};
+ onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+ // Add Conv
+ using Graph = onert::ir::operation::Conv2D;
+
+ auto input_operand = graph.addOperand(shape, type);
+ auto kernel_operand = graph.addOperand(shape, type);
+ auto bias_operand = graph.addOperand(shape, type);
+ IndexSet inputs{input_operand, kernel_operand, bias_operand};
+
+ Graph::Param conv_params;
+ conv_params.padding.type = onert::ir::PaddingType::SAME;
+ conv_params.stride.horizontal = 1;
+ conv_params.stride.vertical = 1;
+ conv_params.activation = onert::ir::Activation::NONE;
+
+ auto output_operand = graph.addOperand(shape, type).value();
+ IndexSet outputs{output_operand};
+
+ auto conv = std::make_unique<Graph>(inputs, outputs, conv_params);
+
+ ASSERT_NE(conv, nullptr);
+ ASSERT_EQ(conv->getInputs().at(Index{0}).value(), inputs.at(0).value());
+ conv->setInputs({8, 9, 10});
+ ASSERT_NE(conv->getInputs().at(Index{0}).value(), inputs.at(0).value());
+ ASSERT_EQ(conv->getInputs().at(Index{0}).value(), 8);
+}
+
+TEST(ir_Operation_setIO, neg_operation_setIO_concat)
+{
+ onert::ir::Graph graph;
+
+ onert::ir::Shape shape{3};
+
+ onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+ using Graph = onert::ir::operation::Concat;
+
+ // Add Concat
+ IndexSet inputs;
+ for (int i = 0; i < 6; ++i)
+ {
+ inputs.append(graph.addOperand(shape, type));
+ }
+
+ Graph::Param concat_params{0};
+
+ auto output_operand = graph.addOperand(shape, type).value();
+ IndexSet outputs{output_operand};
+
+ auto concat = std::make_unique<Graph>(inputs, outputs, concat_params);
+
+ ASSERT_NE(concat, nullptr);
+ ASSERT_EQ(concat->getInputs().size(), 6);
+ ASSERT_EQ(concat->getInputs().at(Index{0}).value(), inputs.at(0).value());
+
+ concat->setInputs({80, 6, 9, 11});
+ ASSERT_EQ(concat->getInputs().size(), 4);
+ ASSERT_NE(concat->getInputs().at(Index{0}).value(), inputs.at(0).value());
+ ASSERT_EQ(concat->getInputs().at(Index{0}).value(), 80);
+ ASSERT_EQ(concat->getInputs().at(Index{2}).value(), 9);
+ ASSERT_THROW(concat->getInputs().at(Index{5}), std::out_of_range);
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <ir/Shape.h>
+
+#include <gtest/gtest.h>
+
+TEST(ShapeTest, basic_test)
+{
+ {
+ onert::ir::Shape shape(3);
+
+ shape.dim(0) = 1;
+ shape.dim(1) = 2;
+ shape.dim(2) = 3;
+
+ ASSERT_EQ(shape.rank(), 3);
+ ASSERT_EQ(shape.num_elements(), 6);
+ ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), false);
+ ASSERT_EQ(shape.hasUnspecifiedDims(), false);
+ }
+ {
+ onert::ir::Shape shape; // scalar or rank is unspecified
+
+ ASSERT_EQ(shape.rank(), 0);
+ ASSERT_EQ(shape.num_elements(), 1);
+ ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), true);
+ ASSERT_EQ(shape.hasUnspecifiedDims(), false);
+ }
+}
+
+TEST(ShapeTest, neg_basic_test)
+{
+ {
+ onert::ir::Shape shape(2);
+
+ shape.dim(0) = 1;
+ shape.dim(1) = onert::ir::Shape::UNSPECIFIED_DIM;
+
+ ASSERT_EQ(shape.rank(), 2);
+ ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), false);
+ ASSERT_EQ(shape.hasUnspecifiedDims(), true);
+ EXPECT_ANY_THROW(shape.num_elements());
+ }
+}
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "ir/Graph.h"
+#include "ir/verifier/Verifier.h"
+#include <memory>
+#include "MockNode.h"
+
+#include <typeindex>
+
+namespace
+{
+
+using IndexSet = onert::ir::OperandIndexSequence;
+using Mock = onert_test::ir::SimpleMock;
+
+} // namespace
+
+TEST(ir_Operand, neg_usedef)
+{
+ onert::ir::Graph graph;
+ onert::ir::verifier::DAGChecker verifier;
+
+ onert::ir::Shape shape(3);
+ onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+ // Model Input/Output
+ auto input_operand = graph.addOperand(shape, type);
+ auto output_operand = graph.addOperand(shape, type);
+
+ graph.addInput(input_operand);
+ graph.addOutput(output_operand);
+
+ // MockNode1
+ auto operand_index1 = graph.addOperand(shape, type);
+ auto mocknode_index1 =
+ graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index1}));
+
+ // MockNode2
+ auto operand_index2 = graph.addOperand(shape, type);
+ auto mocknode_index2 =
+ graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index2}));
+
+ // MockNode3(two input)
+ auto multiinput_index = graph.addOperation(
+ std::make_unique<Mock>(IndexSet{operand_index1, operand_index2}, IndexSet{output_operand}));
+
+ graph.verify();
+
+ ASSERT_TRUE(verifier.verify(graph));
+
+ // Check def
+ ASSERT_EQ(graph.operands().at(operand_index1).getDef(), mocknode_index1);
+ ASSERT_EQ(graph.operands().at(operand_index2).getDef(), mocknode_index2);
+ ASSERT_EQ(graph.operands().at(output_operand).getDef(), multiinput_index);
+
+ ASSERT_NE(graph.operands().at(operand_index1).getDef(), mocknode_index2);
+ ASSERT_NE(graph.operands().at(operand_index1).getDef(), multiinput_index);
+
+ // Check use
+ ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index1), true);
+ ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index2), true);
+ ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(multiinput_index), false);
+ ASSERT_EQ(graph.operands().at(operand_index1).getUses().contains(multiinput_index), true);
+ ASSERT_EQ(graph.operands().at(operand_index2).getUses().contains(multiinput_index), true);
+
+ ASSERT_EQ(graph.operands().at(input_operand).getUses().size(), 2);
+ ASSERT_EQ(graph.operands().at(operand_index1).getUses().size(), 1);
+ ASSERT_EQ(graph.operands().at(output_operand).getUses().size(), 0);
+}
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "ir/Operation.h"
+#include "ir/Graph.h"
+#include "ir/verifier/Verifier.h"
+#include <memory>
+#include "ir/Operand.h"
+#include "MockNode.h"
+
+using IndexSet = onert::ir::OperandIndexSequence;
+using Mock = onert_test::ir::SimpleMock;
+
+TEST(Verifier, dag_checker)
+{
+ onert::ir::Graph graph;
+
+ onert::ir::Shape shape{3};
+ onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+ auto operand1 = graph.addOperand(shape, type);
+ auto operand2 = graph.addOperand(shape, type);
+
+ graph.addInput(operand1);
+ graph.addOutput(operand2);
+
+ graph.addOperation(std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2}));
+
+ onert::ir::verifier::DAGChecker verifier;
+
+ ASSERT_TRUE(verifier.verify(graph));
+}
+
+TEST(Verifier, neg_edge_consistency_checker_1)
+{
+ onert::ir::Graph graph;
+
+ onert::ir::Shape shape{3};
+ onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+ auto operand1 = graph.addOperand(shape, type);
+ auto operand2 = graph.addOperand(shape, type);
+
+ graph.addInput(operand1);
+ graph.addOutput(operand2);
+
+ auto mock_op = std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2});
+ auto op_ind = graph.addOperation(std::move(mock_op));
+
+ graph.operands().at(operand1).removeUse(op_ind); // Manipulate the operand alone
+
+ onert::ir::verifier::EdgeChecker verifier;
+ ASSERT_FALSE(verifier.verify(graph));
+}
+
+TEST(Verifier, neg_edge_consistency_checker_2)
+{
+ onert::ir::Graph graph;
+
+ onert::ir::Shape shape{3};
+ onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+ auto operand1 = graph.addOperand(shape, type);
+ auto operand2 = graph.addOperand(shape, type);
+
+ graph.addInput(operand1);
+ graph.addOutput(operand2);
+
+ auto mock_op = std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2});
+ auto mock_op_ptr = mock_op.get();
+ auto op_ind = graph.addOperation(std::move(mock_op));
+
+ mock_op_ptr->setInputs({operand2}); // Manipulate the operation alone
+
+ onert::ir::verifier::EdgeChecker verifier;
+ ASSERT_FALSE(verifier.verify(graph));
+}
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "util/Index.h"
+
+using Index = ::onert::util::Index<uint32_t, struct TestTag>;
+
+TEST(Index, neg_index_test)
+{
+ Index idx1{1u};
+ Index idx2{2u};
+ Index idx3{idx1};
+
+ ASSERT_EQ(idx1, 1);
+ ASSERT_EQ(idx1, 1u);
+ ASSERT_EQ(idx1.value(), 1u);
+ ASSERT_NE(idx1, idx2);
+ ASSERT_EQ(idx1, idx3);
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "util/ObjectManager.h"
+#include "util/Index.h"
+
+using namespace onert;
+
+struct TestTag;
+using Index = typename util::Index<uint32_t, TestTag>;
+
+TEST(ObjectManager, emplace)
+{
+ util::ObjectManager<Index, int> man;
+
+ auto index = man.emplace(100);
+ ASSERT_EQ(man.at(index), 100);
+}
+
+TEST(ObjectManager, neg_remove_1)
+{
+ util::ObjectManager<Index, int> man;
+
+ Index index = man.emplace(100);
+ ASSERT_TRUE(man.exist(index));
+ ASSERT_EQ(man.at(index), 100);
+
+ man.remove(index);
+ ASSERT_FALSE(man.exist(index));
+}
+
+TEST(ObjectManager, neg_remove_2)
+{
+ util::ObjectManager<Index, int> man;
+
+ auto index0 = man.emplace(100);
+ auto index1 = man.emplace(200);
+ ASSERT_TRUE(man.exist(index0));
+ ASSERT_EQ(man.at(index0), 100);
+ ASSERT_TRUE(man.exist(index1));
+ ASSERT_EQ(man.at(index1), 200);
+
+ man.remove(index0);
+ ASSERT_FALSE(man.exist(index0));
+ ASSERT_TRUE(man.exist(index1));
+ ASSERT_EQ(man.at(index1), 200);
+}
+
+TEST(ObjectManager, push)
+{
+ util::ObjectManager<Index, int> man;
+
+ // Not specify index
+ auto index = man.push(std::make_unique<int>(100));
+ ASSERT_EQ(man.at(index), 100);
+
+ // Specify index
+ auto index2 = man.push(std::make_unique<int>(200), Index{33});
+ ASSERT_EQ(index2.value(), 33);
+ ASSERT_EQ(man.at(index2), 200);
+
+ auto index3 = man.push(std::make_unique<int>(300));
+ // NOTE auto-generated index number is always (biggest index in the ObjectManager + 1)
+ ASSERT_EQ(index3.value(), 34);
+ ASSERT_EQ(man.at(index3), 300);
+
+ auto index4 = man.push(std::make_unique<int>(400), Index{22});
+ ASSERT_EQ(index4.value(), 22);
+ ASSERT_EQ(man.at(index4), 400);
+
+ auto index5 = man.push(std::make_unique<int>(500));
+ // NOTE auto-generated index number is always (biggest index in the ObjectManager + 1)
+ ASSERT_EQ(index5.value(), 35);
+ ASSERT_EQ(man.at(index5), 500);
+}
+
+TEST(ObjectManager, neg_push)
+{
+ util::ObjectManager<Index, int> man;
+
+ // Specify index
+ auto index = man.push(std::make_unique<int>(100), Index{55});
+ ASSERT_EQ(index.value(), 55);
+ ASSERT_EQ(man.at(index), 100);
+
+ // Specify the same index
+ auto index2 = man.push(std::make_unique<int>(200), Index{55});
+ ASSERT_FALSE(index2.valid());
+}
+
+static const uint32_t kMaxUInt32 = std::numeric_limits<uint32_t>::max();
+
+TEST(ObjectManager, neg_push_undefined_index)
+{
+ util::ObjectManager<Index, int> man;
+
+ // Try inserting invalid(undefined) index
+ auto index = man.push(std::make_unique<int>(100), Index{kMaxUInt32});
+ ASSERT_FALSE(index.valid());
+ ASSERT_EQ(man.size(), 0);
+}
+
+TEST(ObjectManager, neg_push_max_index)
+{
+ util::ObjectManager<Index, int> man;
+
+ // Insert an object with maximum valid index
+ auto index = man.push(std::make_unique<int>(100), Index{kMaxUInt32 - 1});
+ ASSERT_EQ(index.value(), kMaxUInt32 - 1);
+ ASSERT_EQ(man.at(index), 100);
+ ASSERT_EQ(man.size(), 1);
+
+ // Reached to the final index so next push/emplace must fail
+ auto index2 = man.push(std::make_unique<int>(200));
+ ASSERT_EQ(man.size(), 1);
+ ASSERT_FALSE(index2.valid());
+}
+
+TEST(ObjectManager, neg_emplace_max_index)
+{
+ util::ObjectManager<Index, int> man;
+
+ // Insert an object with maximum valid index
+ auto index = man.push(std::make_unique<int>(100), Index{kMaxUInt32 - 1});
+ ASSERT_EQ(index.value(), kMaxUInt32 - 1);
+ ASSERT_EQ(man.at(index), 100);
+ ASSERT_EQ(man.size(), 1);
+
+ // Reached to the final index so next push/emplace must fail
+ auto index3 = man.emplace(200);
+ ASSERT_EQ(man.size(), 1);
+ ASSERT_FALSE(index3.valid());
+}
+
+TEST(ObjectManager, const_iterate)
+{
+ util::ObjectManager<Index, int> man;
+
+ auto index0 = man.emplace(100);
+ auto index1 = man.emplace(200);
+ auto index2 = man.emplace(300);
+
+ int sum = 0;
+ man.iterate([&](const Index &index, const int &val) { sum += val; });
+ ASSERT_EQ(sum, 600);
+}
+
+TEST(ObjectManager, non_const_iterate)
+{
+ util::ObjectManager<Index, int> man;
+
+ auto index0 = man.emplace(100);
+ auto index1 = man.emplace(200);
+ auto index2 = man.emplace(300);
+
+ man.iterate([&](const Index &index, int &val) { val += 1; });
+ ASSERT_EQ(man.at(index0), 101);
+ ASSERT_EQ(man.at(index1), 201);
+ ASSERT_EQ(man.at(index2), 301);
+}
+
+TEST(ObjectManager, set)
+{
+ util::ObjectManager<Index, int> man;
+ auto index = man.set(Index{1}, std::make_unique<int>(100)); // Insert
+ ASSERT_EQ(index, Index{1});
+ auto index2 = man.set(index, std::make_unique<int>(200)); // Overwrite
+ ASSERT_EQ(index2, index);
+ ASSERT_EQ(man.at(index2), 200);
+}
+
+TEST(ObjectManager, neg_set)
+{
+ auto v = std::make_unique<int>(100);
+ util::ObjectManager<Index, int> man;
+ auto index = man.set(Index{}, std::move(v)); // Try set with an invalid index
+ ASSERT_EQ(index, Index{});
+ ASSERT_FALSE(index.valid());
+ ASSERT_NE(v, nullptr); // v must be kept when failure
+}
+
+TEST(ObjectManager, getRawPtr)
+{
+ auto v = std::make_unique<int>(100);
+ auto v_ptr = v.get();
+ util::ObjectManager<Index, int> man;
+ auto index = man.push(std::move(v));
+ ASSERT_EQ(v_ptr, man.getRawPtr(index));
+}
+
+TEST(ObjectManager, neg_getRawPtr)
+{
+ util::ObjectManager<Index, int> man;
+ auto ptr = man.getRawPtr(Index{1});
+ ASSERT_EQ(ptr, nullptr);
+}
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "ir/Layout.h"
+#include "util/ShapeInference.h"
+
+using namespace onert::ir;
+
+TEST(ShapeInference, Elementwise)
+{
+ Shape lhs_shape{1, 299, 299, 3};
+ Shape rhs_shape{3};
+ auto infered_out_shape = onert::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.dim(0), 1);
+ ASSERT_EQ(infered_out_shape.dim(1), 299);
+ ASSERT_EQ(infered_out_shape.dim(2), 299);
+ ASSERT_EQ(infered_out_shape.dim(3), 3);
+}
+
+TEST(ShapeInference, neg_Elementwise)
+{
+ Shape lhs_shape{1, 299, 299, 3};
+ Shape rhs_shape{5, 3};
+ ASSERT_THROW(onert::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape), std::runtime_error);
+}
+
+TEST(ShapeInference, Pool2DNodeSame)
+{
+ Shape in_shape{10, 6, 12, 20};
+ Stride stride{3, 7};
+ Padding padding{PaddingType::SAME};
+
+ operation::Pool2D::Param avg_pool_param{
+ operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+ auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+
+ operation::Pool2D::Param max_pool_param{
+ operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+ infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+}
+
+TEST(ShapeInference, Pool2DNodeValid)
+{
+ Shape in_shape{10, 6, 12, 20};
+ Stride stride{3, 7};
+ Padding padding{PaddingType::VALID};
+
+ operation::Pool2D::Param avg_pool_param{
+ operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+ auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+
+ operation::Pool2D::Param max_pool_param{
+ operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+ infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+}
+
+TEST(ShapeInference, Pool2DNodeExplicit)
+{
+ Shape in_shape{10, 3, 5, 20};
+
+ Stride stride{3, 7};
+ Padding padding{4, 3, 2, 1};
+
+ operation::Pool2D::Param avg_pool_param{
+ operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+ auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+
+ operation::Pool2D::Param max_pool_param{
+ operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+ infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+}
+
+TEST(ShapeInference, neg_Pool2DNode_InvalidStride)
+{
+ Shape in_shape{10, 6, 12, 20};
+ Stride stride{0, 7};
+ Padding padding{PaddingType::SAME};
+
+ operation::Pool2D::Param avg_pool_param{
+ operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+ ASSERT_THROW(onert::shape_inference::inferPoolShape(in_shape, avg_pool_param),
+ std::runtime_error);
+}
+
+TEST(ShapeInference, Conv2D)
+{
+ Shape in_shape{10, 6, 12, 20};
+ Shape ker_shape{30, 3, 6, 20};
+
+ operation::Conv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, Activation::NONE,
+ Dilation{1, 1}};
+ auto infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
+
+ param = operation::Conv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, Activation::NONE,
+ Dilation{1, 1}};
+ infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
+
+ param =
+ operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE, Dilation{1, 1}};
+ infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
+}
+
+TEST(ShapeInference, neg_Conv2D_InvalidStride)
+{
+ Shape in_shape{10, 6, 12, 20};
+ Shape ker_shape{30, 3, 6, 20};
+
+ operation::Conv2D::Param param{Stride{0, 0}, Padding{PaddingType::VALID}, Activation::NONE,
+ Dilation{1, 1}};
+ ASSERT_THROW(onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param),
+ std::runtime_error);
+}
+
+TEST(ShapeInference, DepthwiseConv2D)
+{
+ Shape in_shape{10, 6, 12, 20};
+ Shape ker_shape{1, 3, 6, 60};
+
+ operation::DepthwiseConv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, 3,
+ Activation::NONE, Dilation{1, 1}};
+ auto infered_out_shape =
+ onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
+
+ param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, 3,
+ Activation::NONE, Dilation{1, 1}};
+ infered_out_shape = onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
+
+ param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, 3, Activation::NONE,
+ Dilation{1, 1}};
+ infered_out_shape = onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
+}
+
+TEST(ShapeInference, neg_DepthwiseConv2D_InvalidSride)
+{
+ Shape in_shape{10, 6, 12, 20};
+ Shape ker_shape{1, 3, 6, 60};
+
+ operation::DepthwiseConv2D::Param param{Stride{3, 0}, Padding{PaddingType::VALID}, 3,
+ Activation::NONE, Dilation{1, 1}};
+ ASSERT_THROW(onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param),
+ std::runtime_error);
+}
+
+TEST(ShapeInference, Concat)
+{
+ {
+ Shape in1{10, 20, 30, 3, 50};
+ Shape in2{10, 20, 30, 2, 50};
+ Shape in3{10, 20, 30, 2, 50};
+
+ operation::Concat::Param param{3};
+ auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2, in3}, param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 5);
+ ASSERT_EQ(infered_out_shape.dim(0), 10);
+ ASSERT_EQ(infered_out_shape.dim(1), 20);
+ ASSERT_EQ(infered_out_shape.dim(2), 30);
+ ASSERT_EQ(infered_out_shape.dim(3), 7);
+ ASSERT_EQ(infered_out_shape.dim(4), 50);
+ }
+ {
+ // case 1. when axis < 0
+ Shape in1{10, 20, 2};
+ Shape in2{10, 20, 3};
+
+ operation::Concat::Param param{-1};
+ auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2}, param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 3);
+ ASSERT_EQ(infered_out_shape.dim(0), 10);
+ ASSERT_EQ(infered_out_shape.dim(1), 20);
+ ASSERT_EQ(infered_out_shape.dim(2), 5);
+ }
+ {
+ // case 2. when axis < 0
+ Shape in1{2, 20, 2};
+ Shape in2{3, 20, 2};
+
+ operation::Concat::Param param{-3};
+ auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2}, param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 3);
+ ASSERT_EQ(infered_out_shape.dim(0), 5);
+ ASSERT_EQ(infered_out_shape.dim(1), 20);
+ ASSERT_EQ(infered_out_shape.dim(2), 2);
+ }
+}
+
+TEST(ShapeInference, neg_Concat)
+{
+ {
+ operation::Concat::Param param{2};
+ Shape in1{10, 1, 3};
+ Shape in2{10, 2, 4}; // dim[1] should be 1 but 2
+
+ EXPECT_ANY_THROW(onert::shape_inference::inferConcatShape({in1, in2}, param));
+ }
+ { // wrong rank
+ operation::Concat::Param param{2};
+ Shape in1{10, 2, 3, 4};
+ Shape in2{10, 2, 4}; // rank should be 4
+
+ EXPECT_ANY_THROW(onert::shape_inference::inferConcatShape({in1, in2}, param));
+ }
+}
+
+TEST(ShapeInference, ExpandDims)
+{
+ Shape in_shape{30, 40};
+
+ auto check = [&](int32_t axis, Shape &expected) {
+ auto actual = onert::shape_inference::inferExpandDimsShape(in_shape, axis);
+
+ ASSERT_EQ(actual.rank(), 3);
+ for (int32_t dim = 0; dim < expected.rank(); dim++)
+ ASSERT_EQ(actual.dim(dim), expected.dim(dim));
+ };
+
+ { // boundary
+ int32_t axis = 0;
+ Shape expected{1, 30, 40};
+ check(axis, expected);
+ }
+ { // boundary
+ int32_t axis = 2;
+ Shape expected{30, 40, 1};
+ check(axis, expected);
+ }
+ { // inside
+ int32_t axis = 1;
+ Shape expected{30, 1, 40};
+ check(axis, expected);
+ }
+ { // negative boundary
+ int32_t axis = -1;
+ Shape expected{30, 40, 1};
+ check(axis, expected);
+ }
+ { // negative boundary
+ int32_t axis = -3;
+ Shape expected{1, 30, 40};
+ check(axis, expected);
+ }
+}
+
+TEST(ShapeInference, neg_ExpandDims)
+{
+ Shape in_shape{30, 40};
+
+ { // over boundary
+ int32_t axis = 3;
+ ASSERT_THROW(onert::shape_inference::inferExpandDimsShape(in_shape, axis), std::runtime_error);
+ }
+ { // over boundary
+ int32_t axis = -4;
+ ASSERT_THROW(onert::shape_inference::inferExpandDimsShape(in_shape, axis), std::runtime_error);
+ }
+}
+
+TEST(ShapeInference, FullyConnected)
+{
+ Shape in_shape{3, 4, 5, 6};
+ Shape ker_shape{3, 10};
+ auto infered_out_shape = onert::shape_inference::inferFullyConnectedShape(in_shape, ker_shape);
+
+ ASSERT_EQ(infered_out_shape.rank(), 2);
+ ASSERT_EQ(infered_out_shape.dim(0), 36);
+ ASSERT_EQ(infered_out_shape.dim(1), 3);
+}
+
+TEST(ShapeInference, Transpose)
+{
+ auto check = [&](Shape &in_shape, std::vector<int> perm, Shape &expected) {
+ // pre-conditions
+ ASSERT_EQ(in_shape.rank(), perm.size());
+ ASSERT_EQ(expected.rank(), perm.size());
+ auto inferred_out_shape =
+ onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size());
+ // post-conditions
+ ASSERT_EQ(inferred_out_shape.rank(), perm.size());
+ for (int32_t dim = 0; dim < expected.rank(); dim++)
+ {
+ ASSERT_EQ(inferred_out_shape.dim(dim), expected.dim(dim));
+ }
+ };
+ // check for 2-D
+ {
+ Shape in_shape{2, 3};
+ std::vector<int> perm = {1, 0};
+ Shape expected{3, 2};
+ // int32_t rank = 2;
+ check(in_shape, perm, expected);
+ }
+ // check for 3-D
+ {
+ Shape in_shape{1, 2, 3};
+ std::vector<int> perm = {2, 0, 1};
+ Shape expected{3, 1, 2};
+ // int32_t rank = 3;
+ check(in_shape, perm, expected);
+ }
+ // check for 4-D
+ {
+ Shape in_shape{1, 2, 3, 4};
+ std::vector<int> perm = {1, 3, 0, 2};
+ Shape expected{2, 4, 1, 3};
+ // int32_t rank = 4;
+ check(in_shape, perm, expected);
+ }
+}
+
+TEST(ShapeInference, neg_Transpose)
+{
+ Shape in_shape{1, 2, 3};
+ // Invalid parameter size
+ {
+ std::vector<int> perm = {2, 0, 1, 0};
+ // int32_t rank = 3;
+ ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()),
+ std::runtime_error);
+ }
+ // Invalid parameter value
+ {
+ std::vector<int> perm = {2, 0, 3};
+ // int32_t rank = 3;
+ ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()),
+ std::runtime_error);
+ }
+}
+
+TEST(ShapeInference, Gather)
+{
+ auto check = [&](Shape &input, Shape &indices, Shape &expected, int32_t axis) {
+ int rank = input.rank();
+ auto actual = onert::shape_inference::inferGatherShape(input, indices, axis, rank);
+
+ ASSERT_EQ(actual.rank(), expected.rank());
+
+ for (int32_t dim = 0; dim < expected.rank(); dim++)
+ ASSERT_EQ(actual.dim(dim), expected.dim(dim));
+ };
+
+ // check for 2-D, 3-D, axis 0
+ {
+ Shape input{3, 4};
+ Shape indices{1, 1, 2};
+ int32_t axis = 0;
+ Shape expected{1, 1, 2, 4};
+ check(input, indices, expected, axis);
+ }
+
+ // check for 2-D, 3-D, axis 1
+ {
+ Shape input{3, 4};
+ Shape indices{1, 2, 1};
+ int32_t axis = 1;
+ Shape expected{3, 1, 2, 1};
+ check(input, indices, expected, axis);
+ }
+
+ // check for 3-D, 2-D, axis 0
+ {
+ Shape input{2, 3, 4};
+ Shape indices{1, 2};
+ int32_t axis = 0;
+ Shape expected{1, 2, 3, 4};
+ check(input, indices, expected, axis);
+ }
+
+ // check for 3-D, 2-D, axis 2
+ {
+ Shape input{2, 3, 4};
+ Shape indices{2, 1};
+ int32_t axis = 2;
+ Shape expected{2, 3, 2, 1};
+ check(input, indices, expected, axis);
+ }
+
+ // check for 4D, axis 0
+ {
+ Shape input{1, 2, 3, 4};
+ Shape indices{2};
+ int32_t axis = 0;
+ Shape expected{2, 2, 3, 4};
+ check(input, indices, expected, axis);
+ }
+}
+
+TEST(ShapeInference, BCQFullyConnected)
+{
+ auto check = [&](Shape &in_shape, Shape &cluster_shape, std::vector<int> cluster,
+ Shape &expected) {
+ auto actual =
+ onert::shape_inference::inferBCQFullyConnectedShape(in_shape, cluster_shape, cluster.data());
+ ASSERT_EQ(actual.rank(), expected.rank());
+
+ for (int32_t dim = 0; dim < expected.rank(); dim++)
+ ASSERT_EQ(actual.dim(dim), expected.dim(dim));
+ };
+
+ {
+ Shape in_shape{10, 1};
+ Shape cluster_shape{3, 2};
+ std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
+
+ Shape expected{30, 1};
+ check(in_shape, cluster_shape, cluster, expected);
+ }
+
+ {
+ Shape in_shape{1, 1};
+ Shape cluster_shape{1, 2};
+ std::vector<int> cluster = {3, 50};
+
+ Shape expected{50, 1};
+ check(in_shape, cluster_shape, cluster, expected);
+ }
+}
+
+TEST(ShapeInference, BCQGather)
+{
+ auto check = [&](Shape &indices_shape, Shape &cluster_shape, std::vector<int> cluster,
+ uint32_t hidden_size, uint32_t axis, int rank, Shape &expected) {
+ operation::BCQGather::Param param{hidden_size, axis};
+ auto actual = onert::shape_inference::inferBCQGatherShape(indices_shape, cluster_shape,
+ cluster.data(), rank, param);
+ ASSERT_EQ(actual.rank(), expected.rank());
+
+ for (int32_t dim = 0; dim < expected.rank(); dim++)
+ ASSERT_EQ(actual.dim(dim), expected.dim(dim));
+ };
+
+ {
+ Shape indices_shape{5, 1};
+ Shape cluster_shape{3, 2};
+ std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
+ uint32_t hidden_size = 10;
+ uint32_t axis = 0;
+ int rank = 2;
+
+ Shape expected{5, 1, 10};
+ check(indices_shape, cluster_shape, cluster, hidden_size, axis, rank, expected);
+ }
+
+ {
+ Shape indices_shape{5, 1};
+ Shape cluster_shape{3, 2};
+ std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
+ uint32_t hidden_size = 10;
+ uint32_t axis = 1;
+ int rank = 2;
+
+ Shape expected{30, 5, 1};
+ check(indices_shape, cluster_shape, cluster, hidden_size, axis, rank, expected);
+ }
+}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Graph.h"
-#include "ir/operation/BinaryArithmetic.h"
-#include "ir/verifier/Verifier.h"
-
-TEST(Graph, neg_inputs_and_outputs)
-{
- onert::ir::Graph graph;
-
- onert::ir::OperandIndex index0{0u};
- onert::ir::OperandIndex index1{1u};
-
- graph.addInput({index0});
- graph.addInput({index1});
-
- onert::ir::OperandIndex index10{10u};
- onert::ir::OperandIndex index11{11u};
- onert::ir::OperandIndex index12{12u};
-
- graph.addOutput({index10});
- graph.addOutput({index11});
- graph.addOutput({index12});
-
- ASSERT_EQ(graph.getInputs().size(), 2);
- ASSERT_EQ(graph.getOutputs().size(), 3);
-
- onert::ir::IOIndex io_index0{0};
- onert::ir::IOIndex io_index1{1};
- onert::ir::IOIndex io_index2{2};
-
- ASSERT_EQ(graph.getInputs().at(io_index0), 0);
- ASSERT_EQ(graph.getInputs().at(io_index1), 1);
-
- ASSERT_EQ(graph.getOutputs().at(io_index0), 10);
- ASSERT_EQ(graph.getOutputs().at(io_index1), 11);
- ASSERT_EQ(graph.getOutputs().at(io_index2), 12);
-
- EXPECT_THROW(graph.getOutputs().at(onert::ir::IOIndex{3}), std::out_of_range);
-}
-
-using namespace onert::ir;
-
-OperationIndex addAddOperation(Graph &graph, const OperandIndexSequence inputs,
- const OperandIndexSequence outputs)
-{
- // Add "ADD" operation
- operation::BinaryArithmetic::Param param;
- param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
- param.activation = Activation::NONE;
- return graph.addOperation(std::make_unique<operation::BinaryArithmetic>(inputs, outputs, param));
-}
-
-TEST(Graph, OneOpGraphFinish)
-{
- // Simple Graph with just one Add operation
-
- Graph graph;
-
- // Add tensors
- Shape shape{1, 2, 2, 1};
- TypeInfo type{DataType::FLOAT32};
- auto lhs = graph.addOperand(shape, type);
- auto rhs = graph.addOperand(shape, type);
- auto res = graph.addOperand(shape, type);
-
- addAddOperation(graph, {lhs, rhs}, {res});
-
- // Set model inputs/outputs
- graph.addInput(lhs);
- graph.addInput(rhs);
- graph.addOutput(res);
-
- graph.finishBuilding();
-
- SUCCEED();
-}
-
-TEST(Graph, neg_InvalidGraphFinish_BadInput)
-{
- Graph graph;
-
- // Add tensors
- Shape shape{1, 2, 2, 1};
- TypeInfo type{DataType::FLOAT32};
- auto in = graph.addOperand(shape, type);
- auto out = graph.addOperand(shape, type);
-
- // Set model inputs/outputs
- graph.addInput(in);
- graph.addOutput(out);
- graph.addInput(OperandIndex{89}); // Non-exisiting operand!
-
- EXPECT_ANY_THROW(graph.finishBuilding());
-}
-
-TEST(Graph, neg_InvalidGraphFinish_BadOutput)
-{
- Graph graph;
-
- // Add tensors
- Shape shape{1, 2, 2, 1};
- TypeInfo type{DataType::FLOAT32};
- auto in = graph.addOperand(shape, type);
- auto out = graph.addOperand(shape, type);
-
- // Set model inputs/outputs
- graph.addInput(in);
- graph.addOutput(out);
- graph.addOutput(OperandIndex{12}); // Non-exisiting operand!
-
- EXPECT_ANY_THROW(graph.finishBuilding());
-}
-
-TEST(Graph, neg_InvalidGraphFinish_BadInputOutputForOp)
-{
- Graph graph;
-
- // Add tensors
- Shape shape{1, 2, 2, 1};
- TypeInfo type{DataType::FLOAT32};
- auto lhs = graph.addOperand(shape, type);
- auto rhs = graph.addOperand(shape, type);
- auto res = graph.addOperand(shape, type);
-
- addAddOperation(graph, {lhs, OperandIndex{99}}, {res});
-
- // Set model inputs/outputs
- graph.addInput(lhs);
- graph.addInput(rhs);
- graph.addOutput(res);
-
- EXPECT_ANY_THROW(graph.finishBuilding());
-}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "util/Index.h"
-
-using Index = ::onert::util::Index<uint32_t, struct TestTag>;
-
-TEST(Index, neg_index_test)
-{
- Index idx1{1u};
- Index idx2{2u};
- Index idx3{idx1};
-
- ASSERT_EQ(idx1, 1);
- ASSERT_EQ(idx1, 1u);
- ASSERT_EQ(idx1.value(), 1u);
- ASSERT_NE(idx1, idx2);
- ASSERT_EQ(idx1, idx3);
-}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_TEST_GRAPH_MOCK_NODE_H__
-#define __ONERT_TEST_GRAPH_MOCK_NODE_H__
-
-#include "ir/Operation.h"
-#include "ir/OperandIndexSequence.h"
-
-namespace onert_test
-{
-namespace ir
-{
-
-class SimpleMock : public onert::ir::Operation
-{
-public:
- SimpleMock(const onert::ir::OperandIndexSequence &inputs,
- const onert::ir::OperandIndexSequence &outputs)
- : Operation{onert::ir::OperandConstraint::createAny()}
- {
- setInputs(inputs);
- setOutputs(outputs);
- }
-
-public:
- void accept(onert::ir::OperationVisitor &) const override {}
- onert::ir::OpCode opcode() const final { return onert::ir::OpCode::Invalid; }
-};
-
-} // namespace ir
-} // namespace onert_test
-
-#endif // __ONERT_TEST_GRAPH_MOCK_NODE_H__
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/OperandIndexSequence.h"
-
-using onert::ir::OperandIndex;
-using onert::ir::OperandIndexSequence;
-
-TEST(ir_OperandIndexSequence, neg_append)
-{
- OperandIndexSequence iset{0, 2, 4, 8};
-
- ASSERT_EQ(iset.size(), 4);
-
- iset.append(OperandIndex{10});
-
- ASSERT_EQ(iset.size(), 5);
-
- onert::ir::IOIndex index1{1};
- onert::ir::IOIndex index2{4};
-
- ASSERT_EQ(iset.at(index1), 2);
- ASSERT_EQ(iset.at(index2), 10);
-
- ASSERT_TRUE(iset.contains(OperandIndex{2}));
- ASSERT_TRUE(iset.contains(OperandIndex{10}));
- ASSERT_FALSE(iset.contains(OperandIndex{11}));
-}
-
-TEST(graph_OperandIndexSequence, neg_replace)
-{
- OperandIndexSequence iset{0, 1, 2, 3};
-
- iset.replace(OperandIndex{1}, OperandIndex{9});
- ASSERT_FALSE(iset.contains(OperandIndex{1}));
- ASSERT_TRUE(iset.contains(OperandIndex{9}));
-}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/LayoutSet.h"
-
-using onert::ir::Layout;
-using onert::ir::LayoutSet;
-
-TEST(ir_LayoutSet, neg_add_remove)
-{
- LayoutSet set{Layout::NCHW};
- set.remove(Layout::NHWC);
- ASSERT_EQ(set.size(), 1);
- set.add(Layout::NHWC);
- ASSERT_EQ(set.size(), 2);
- set.remove(Layout::NHWC);
- ASSERT_EQ(set.size(), 1);
- set.remove(Layout::NCHW);
- ASSERT_EQ(set.size(), 0);
- set.remove(Layout::NCHW);
- ASSERT_EQ(set.size(), 0);
-}
-
-TEST(ir_LayoutSet, set_operators)
-{
- LayoutSet set1{Layout::NCHW};
- LayoutSet set2{Layout::NHWC};
- LayoutSet set3 = set1 | set2;
-
- ASSERT_EQ(set3.size(), 2);
-
- ASSERT_EQ((set3 - set1).size(), 1);
- ASSERT_EQ((set3 - set1).contains(Layout::NHWC), true);
- ASSERT_EQ((set3 - set2).size(), 1);
- ASSERT_EQ((set3 - set2).contains(Layout::NCHW), true);
- ASSERT_EQ((set3 - set3).size(), 0);
-
- ASSERT_EQ((set3 & set1).size(), 1);
- ASSERT_EQ((set3 & set1).contains(Layout::NCHW), true);
- ASSERT_EQ((set3 & set2).size(), 1);
- ASSERT_EQ((set3 & set2).contains(Layout::NHWC), true);
- ASSERT_EQ((set1 & set2).size(), 0);
-}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Operands.h"
-
-TEST(ir_Operands, neg_set_test)
-{
- onert::ir::Operands set;
-
- onert::ir::Shape shape0{1, 2, 3};
-
- onert::ir::Shape shape1(4);
- shape1.dim(0) = 10;
- shape1.dim(1) = 20;
- shape1.dim(2) = 30;
- shape1.dim(3) = 40;
-
- onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
- set.emplace(shape0, type);
- set.emplace(shape1, type);
-
- ASSERT_EQ(set.exist(onert::ir::OperandIndex{0u}), true);
- ASSERT_EQ(set.exist(onert::ir::OperandIndex{1u}), true);
- ASSERT_EQ(set.exist(onert::ir::OperandIndex{2u}), false);
-
- ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(0), 1);
- ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(1), 2);
- ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(2), 3);
-}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Graph.h"
-#include "ir/verifier/Verifier.h"
-#include <memory>
-#include "../MockNode.h"
-
-#include <typeindex>
-
-namespace
-{
-
-using IndexSet = onert::ir::OperandIndexSequence;
-using Mock = onert_test::ir::SimpleMock;
-
-} // namespace
-
-TEST(ir_Operand, neg_usedef)
-{
- onert::ir::Graph graph;
- onert::ir::verifier::DAGChecker verifier;
-
- onert::ir::Shape shape(3);
- onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
- // Model Input/Output
- auto input_operand = graph.addOperand(shape, type);
- auto output_operand = graph.addOperand(shape, type);
-
- graph.addInput(input_operand);
- graph.addOutput(output_operand);
-
- // MockNode1
- auto operand_index1 = graph.addOperand(shape, type);
- auto mocknode_index1 =
- graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index1}));
-
- // MockNode2
- auto operand_index2 = graph.addOperand(shape, type);
- auto mocknode_index2 =
- graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index2}));
-
- // MockNode3(two input)
- auto multiinput_index = graph.addOperation(
- std::make_unique<Mock>(IndexSet{operand_index1, operand_index2}, IndexSet{output_operand}));
-
- graph.finishBuilding();
-
- ASSERT_TRUE(verifier.verify(graph));
-
- // Check def
- ASSERT_EQ(graph.operands().at(operand_index1).getDef(), mocknode_index1);
- ASSERT_EQ(graph.operands().at(operand_index2).getDef(), mocknode_index2);
- ASSERT_EQ(graph.operands().at(output_operand).getDef(), multiinput_index);
-
- ASSERT_NE(graph.operands().at(operand_index1).getDef(), mocknode_index2);
- ASSERT_NE(graph.operands().at(operand_index1).getDef(), multiinput_index);
-
- // Check use
- ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index1), true);
- ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index2), true);
- ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(multiinput_index), false);
- ASSERT_EQ(graph.operands().at(operand_index1).getUses().contains(multiinput_index), true);
- ASSERT_EQ(graph.operands().at(operand_index2).getUses().contains(multiinput_index), true);
-
- ASSERT_EQ(graph.operands().at(input_operand).getUses().size(), 2);
- ASSERT_EQ(graph.operands().at(operand_index1).getUses().size(), 1);
- ASSERT_EQ(graph.operands().at(output_operand).getUses().size(), 0);
-}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "../MockNode.h"
-#include "ir/Operations.h"
-
-using onert::ir::Operation;
-using onert::ir::OperationIndex;
-using onert::ir::Operations;
-
-TEST(ir_Operations, basic)
-{
- Operations ops;
- ops.push(std::unique_ptr<Operation>(new onert_test::ir::SimpleMock({1, 2, 3, 4}, {5, 6, 7})));
- OperationIndex idx{0u};
- ASSERT_EQ(ops.at(idx).getInputs().size(), 4);
- ASSERT_EQ(ops.at(idx).getOutputs().size(), 3);
-}
-
-TEST(ir_Operations, neg_at)
-{
- Operations ops;
- ops.push(std::unique_ptr<Operation>(new onert_test::ir::SimpleMock({1, 2, 3, 4}, {5, 6, 7})));
- OperationIndex idx{99u};
- EXPECT_THROW(ops.at(idx), std::out_of_range);
-}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Graph.h"
-#include "ir/Index.h"
-#include "ir/OperandIndexSequence.h"
-#include "ir/operation/Conv2D.h"
-#include "ir/operation/Concat.h"
-
-#include <memory>
-
-#include <stdexcept>
-
-using Index = onert::ir::IOIndex;
-using IndexSet = onert::ir::OperandIndexSequence;
-
-TEST(ir_Operation_setIO, operation_setIO_conv)
-{
- onert::ir::Graph graph;
-
- onert::ir::Shape shape{3};
- onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
- // Add Conv
- using Graph = onert::ir::operation::Conv2D;
-
- auto input_operand = graph.addOperand(shape, type);
- auto kernel_operand = graph.addOperand(shape, type);
- auto bias_operand = graph.addOperand(shape, type);
- IndexSet inputs{input_operand, kernel_operand, bias_operand};
-
- Graph::Param conv_params;
- conv_params.padding.type = onert::ir::PaddingType::SAME;
- conv_params.stride.horizontal = 1;
- conv_params.stride.vertical = 1;
- conv_params.activation = onert::ir::Activation::NONE;
-
- auto output_operand = graph.addOperand(shape, type).value();
- IndexSet outputs{output_operand};
-
- auto conv = std::make_unique<Graph>(inputs, outputs, conv_params);
-
- ASSERT_NE(conv, nullptr);
- ASSERT_EQ(conv->getInputs().at(Index{0}).value(), inputs.at(0).value());
- conv->setInputs({8, 9, 10});
- ASSERT_NE(conv->getInputs().at(Index{0}).value(), inputs.at(0).value());
- ASSERT_EQ(conv->getInputs().at(Index{0}).value(), 8);
-}
-
-TEST(ir_Operation_setIO, neg_operation_setIO_concat)
-{
- onert::ir::Graph graph;
-
- onert::ir::Shape shape{3};
-
- onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
- using Graph = onert::ir::operation::Concat;
-
- // Add Concat
- IndexSet inputs;
- for (int i = 0; i < 6; ++i)
- {
- inputs.append(graph.addOperand(shape, type));
- }
-
- Graph::Param concat_params{0};
-
- auto output_operand = graph.addOperand(shape, type).value();
- IndexSet outputs{output_operand};
-
- auto concat = std::make_unique<Graph>(inputs, outputs, concat_params);
-
- ASSERT_NE(concat, nullptr);
- ASSERT_EQ(concat->getInputs().size(), 6);
- ASSERT_EQ(concat->getInputs().at(Index{0}).value(), inputs.at(0).value());
-
- concat->setInputs({80, 6, 9, 11});
- ASSERT_EQ(concat->getInputs().size(), 4);
- ASSERT_NE(concat->getInputs().at(Index{0}).value(), inputs.at(0).value());
- ASSERT_EQ(concat->getInputs().at(Index{0}).value(), 80);
- ASSERT_EQ(concat->getInputs().at(Index{2}).value(), 9);
- ASSERT_THROW(concat->getInputs().at(Index{5}), std::out_of_range);
-}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Operation.h"
-#include "ir/Graph.h"
-#include "ir/verifier/Verifier.h"
-#include <memory>
-#include "ir/Operand.h"
-#include "../MockNode.h"
-
-using IndexSet = onert::ir::OperandIndexSequence;
-using Mock = onert_test::ir::SimpleMock;
-
-TEST(Verifier, dag_checker)
-{
- onert::ir::Graph graph;
-
- onert::ir::Shape shape{3};
- onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
- auto operand1 = graph.addOperand(shape, type);
- auto operand2 = graph.addOperand(shape, type);
-
- graph.addInput(operand1);
- graph.addOutput(operand2);
-
- graph.addOperation(std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2}));
-
- graph.finishBuilding();
-
- onert::ir::verifier::DAGChecker verifier;
-
- ASSERT_TRUE(verifier.verify(graph));
-}
-
-TEST(Verifier, neg_edge_consistency_checker_1)
-{
- onert::ir::Graph graph;
-
- onert::ir::Shape shape{3};
- onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
- auto operand1 = graph.addOperand(shape, type);
- auto operand2 = graph.addOperand(shape, type);
-
- graph.addInput(operand1);
- graph.addOutput(operand2);
-
- auto mock_op = std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2});
- auto op_ind = graph.addOperation(std::move(mock_op));
-
- graph.finishBuilding();
-
- graph.operands().at(operand1).removeUse(op_ind); // Manipulate the operand alone
-
- onert::ir::verifier::EdgeConsistencyChecker verifier;
- ASSERT_FALSE(verifier.verify(graph));
-}
-
-TEST(Verifier, neg_edge_consistency_checker_2)
-{
- onert::ir::Graph graph;
-
- onert::ir::Shape shape{3};
- onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
- auto operand1 = graph.addOperand(shape, type);
- auto operand2 = graph.addOperand(shape, type);
-
- graph.addInput(operand1);
- graph.addOutput(operand2);
-
- auto mock_op = std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2});
- auto mock_op_ptr = mock_op.get();
- auto op_ind = graph.addOperation(std::move(mock_op));
-
- graph.finishBuilding();
-
- mock_op_ptr->setInputs({operand2}); // Manipulate the operation alone
-
- onert::ir::verifier::EdgeConsistencyChecker verifier;
- ASSERT_FALSE(verifier.verify(graph));
-}
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <ir/Shape.h>
-
-#include <gtest/gtest.h>
-
-TEST(ShapeTest, basic_test)
-{
- {
- onert::ir::Shape shape(3);
-
- shape.dim(0) = 1;
- shape.dim(1) = 2;
- shape.dim(2) = 3;
-
- ASSERT_EQ(shape.rank(), 3);
- ASSERT_EQ(shape.num_elements(), 6);
- ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), false);
- ASSERT_EQ(shape.hasUnspecifiedDims(), false);
- }
- {
- onert::ir::Shape shape; // scalar or rank is unspecified
-
- ASSERT_EQ(shape.rank(), 0);
- ASSERT_EQ(shape.num_elements(), 1);
- ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), true);
- ASSERT_EQ(shape.hasUnspecifiedDims(), false);
- }
-}
-
-TEST(ShapeTest, neg_basic_test)
-{
- {
- onert::ir::Shape shape(2);
-
- shape.dim(0) = 1;
- shape.dim(1) = onert::ir::Shape::UNSPECIFIED_DIM;
-
- ASSERT_EQ(shape.rank(), 2);
- ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), false);
- ASSERT_EQ(shape.hasUnspecifiedDims(), true);
- EXPECT_ANY_THROW(shape.num_elements());
- }
-}
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "util/ObjectManager.h"
-#include "util/Index.h"
-
-using namespace onert;
-
-struct TestTag;
-using Index = typename util::Index<uint32_t, TestTag>;
-
-TEST(ObjectManager, emplace)
-{
- util::ObjectManager<Index, int> man;
-
- auto index = man.emplace(100);
- ASSERT_EQ(man.at(index), 100);
-}
-
-TEST(ObjectManager, neg_remove_1)
-{
- util::ObjectManager<Index, int> man;
-
- Index index = man.emplace(100);
- ASSERT_TRUE(man.exist(index));
- ASSERT_EQ(man.at(index), 100);
-
- man.remove(index);
- ASSERT_FALSE(man.exist(index));
-}
-
-TEST(ObjectManager, neg_remove_2)
-{
- util::ObjectManager<Index, int> man;
-
- auto index0 = man.emplace(100);
- auto index1 = man.emplace(200);
- ASSERT_TRUE(man.exist(index0));
- ASSERT_EQ(man.at(index0), 100);
- ASSERT_TRUE(man.exist(index1));
- ASSERT_EQ(man.at(index1), 200);
-
- man.remove(index0);
- ASSERT_FALSE(man.exist(index0));
- ASSERT_TRUE(man.exist(index1));
- ASSERT_EQ(man.at(index1), 200);
-}
-
-TEST(ObjectManager, push)
-{
- util::ObjectManager<Index, int> man;
-
- auto index = man.push(std::unique_ptr<int>{new int{100}});
- ASSERT_EQ(man.at(index), 100);
-}
-
-TEST(ObjectManager, const_iterate)
-{
- util::ObjectManager<Index, int> man;
-
- auto index0 = man.emplace(100);
- auto index1 = man.emplace(200);
- auto index2 = man.emplace(300);
-
- int sum = 0;
- man.iterate([&](const Index &index, const int &val) { sum += val; });
- ASSERT_EQ(sum, 600);
-}
-
-TEST(ObjectManager, non_const_iterate)
-{
- util::ObjectManager<Index, int> man;
-
- auto index0 = man.emplace(100);
- auto index1 = man.emplace(200);
- auto index2 = man.emplace(300);
-
- man.iterate([&](const Index &index, int &val) { val += 1; });
- ASSERT_EQ(man.at(index0), 101);
- ASSERT_EQ(man.at(index1), 201);
- ASSERT_EQ(man.at(index2), 301);
-}
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Layout.h"
-#include "util/ShapeInference.h"
-
-using namespace onert::ir;
-
-TEST(ShapeInference, Elementwise)
-{
- Shape lhs_shape{1, 299, 299, 3};
- Shape rhs_shape{3};
- auto infered_out_shape = onert::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape);
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.dim(0), 1);
- ASSERT_EQ(infered_out_shape.dim(1), 299);
- ASSERT_EQ(infered_out_shape.dim(2), 299);
- ASSERT_EQ(infered_out_shape.dim(3), 3);
-}
-
-TEST(ShapeInference, neg_Elementwise)
-{
- Shape lhs_shape{1, 299, 299, 3};
- Shape rhs_shape{5, 3};
- ASSERT_THROW(onert::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape), std::runtime_error);
-}
-
-TEST(ShapeInference, Pool2DNodeSame)
-{
- Shape in_shape{10, 6, 12, 20};
- Stride stride{3, 7};
- Padding padding{PaddingType::SAME};
-
- operation::Pool2D::Param avg_pool_param{
- operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
- auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-
- operation::Pool2D::Param max_pool_param{
- operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
- infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-}
-
-TEST(ShapeInference, Pool2DNodeValid)
-{
- Shape in_shape{10, 6, 12, 20};
- Stride stride{3, 7};
- Padding padding{PaddingType::VALID};
-
- operation::Pool2D::Param avg_pool_param{
- operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
- auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-
- operation::Pool2D::Param max_pool_param{
- operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
- infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-}
-
-TEST(ShapeInference, Pool2DNodeExplicit)
-{
- Shape in_shape{10, 3, 5, 20};
-
- Stride stride{3, 7};
- Padding padding{4, 3, 2, 1};
-
- operation::Pool2D::Param avg_pool_param{
- operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
- auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-
- operation::Pool2D::Param max_pool_param{
- operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
- infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-}
-
-TEST(ShapeInference, neg_Pool2DNode_InvalidStride)
-{
- Shape in_shape{10, 6, 12, 20};
- Stride stride{0, 7};
- Padding padding{PaddingType::SAME};
-
- operation::Pool2D::Param avg_pool_param{
- operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
- ASSERT_THROW(onert::shape_inference::inferPoolShape(in_shape, avg_pool_param),
- std::runtime_error);
-}
-
-TEST(ShapeInference, Conv2D)
-{
- Shape in_shape{10, 6, 12, 20};
- Shape ker_shape{30, 3, 6, 20};
-
- operation::Conv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, Activation::NONE,
- Dilation{1, 1}};
- auto infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
-
- param = operation::Conv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, Activation::NONE,
- Dilation{1, 1}};
- infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
-
- param =
- operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE, Dilation{1, 1}};
- infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
-}
-
-TEST(ShapeInference, neg_Conv2D_InvalidStride)
-{
- Shape in_shape{10, 6, 12, 20};
- Shape ker_shape{30, 3, 6, 20};
-
- operation::Conv2D::Param param{Stride{0, 0}, Padding{PaddingType::VALID}, Activation::NONE,
- Dilation{1, 1}};
- ASSERT_THROW(onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param),
- std::runtime_error);
-}
-
-TEST(ShapeInference, DepthwiseConv2D)
-{
- Shape in_shape{10, 6, 12, 20};
- Shape ker_shape{1, 3, 6, 60};
-
- operation::DepthwiseConv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, 3,
- Activation::NONE, Dilation{1, 1}};
- auto infered_out_shape =
- onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
-
- param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, 3,
- Activation::NONE, Dilation{1, 1}};
- infered_out_shape = onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
-
- param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, 3, Activation::NONE,
- Dilation{1, 1}};
- infered_out_shape = onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
-}
-
-TEST(ShapeInference, neg_DepthwiseConv2D_InvalidSride)
-{
- Shape in_shape{10, 6, 12, 20};
- Shape ker_shape{1, 3, 6, 60};
-
- operation::DepthwiseConv2D::Param param{Stride{3, 0}, Padding{PaddingType::VALID}, 3,
- Activation::NONE, Dilation{1, 1}};
- ASSERT_THROW(onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param),
- std::runtime_error);
-}
-
-TEST(ShapeInference, Concat)
-{
- {
- Shape in1{10, 20, 30, 3, 50};
- Shape in2{10, 20, 30, 2, 50};
- Shape in3{10, 20, 30, 2, 50};
-
- operation::Concat::Param param{3};
- auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2, in3}, param);
-
- ASSERT_EQ(infered_out_shape.rank(), 5);
- ASSERT_EQ(infered_out_shape.dim(0), 10);
- ASSERT_EQ(infered_out_shape.dim(1), 20);
- ASSERT_EQ(infered_out_shape.dim(2), 30);
- ASSERT_EQ(infered_out_shape.dim(3), 7);
- ASSERT_EQ(infered_out_shape.dim(4), 50);
- }
- {
- // case 1. when axis < 0
- Shape in1{10, 20, 2};
- Shape in2{10, 20, 3};
-
- operation::Concat::Param param{-1};
- auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2}, param);
-
- ASSERT_EQ(infered_out_shape.rank(), 3);
- ASSERT_EQ(infered_out_shape.dim(0), 10);
- ASSERT_EQ(infered_out_shape.dim(1), 20);
- ASSERT_EQ(infered_out_shape.dim(2), 5);
- }
- {
- // case 2. when axis < 0
- Shape in1{2, 20, 2};
- Shape in2{3, 20, 2};
-
- operation::Concat::Param param{-3};
- auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2}, param);
-
- ASSERT_EQ(infered_out_shape.rank(), 3);
- ASSERT_EQ(infered_out_shape.dim(0), 5);
- ASSERT_EQ(infered_out_shape.dim(1), 20);
- ASSERT_EQ(infered_out_shape.dim(2), 2);
- }
-}
-
-TEST(ShapeInference, neg_Concat)
-{
- {
- operation::Concat::Param param{2};
- Shape in1{10, 1, 3};
- Shape in2{10, 2, 4}; // dim[1] should be 1 but 2
-
- EXPECT_ANY_THROW(onert::shape_inference::inferConcatShape({in1, in2}, param));
- }
- { // wrong rank
- operation::Concat::Param param{2};
- Shape in1{10, 2, 3, 4};
- Shape in2{10, 2, 4}; // rank should be 4
-
- EXPECT_ANY_THROW(onert::shape_inference::inferConcatShape({in1, in2}, param));
- }
-}
-
-TEST(ShapeInference, ExpandDims)
-{
- Shape in_shape{30, 40};
-
- auto check = [&](int32_t axis, Shape &expected) {
- auto actual = onert::shape_inference::inferExpandDimsShape(in_shape, axis);
-
- ASSERT_EQ(actual.rank(), 3);
- for (int32_t dim = 0; dim < expected.rank(); dim++)
- ASSERT_EQ(actual.dim(dim), expected.dim(dim));
- };
-
- { // boundary
- int32_t axis = 0;
- Shape expected{1, 30, 40};
- check(axis, expected);
- }
- { // boundary
- int32_t axis = 2;
- Shape expected{30, 40, 1};
- check(axis, expected);
- }
- { // inside
- int32_t axis = 1;
- Shape expected{30, 1, 40};
- check(axis, expected);
- }
- { // negative boundary
- int32_t axis = -1;
- Shape expected{30, 40, 1};
- check(axis, expected);
- }
- { // negative boundary
- int32_t axis = -3;
- Shape expected{1, 30, 40};
- check(axis, expected);
- }
-}
-
-TEST(ShapeInference, neg_ExpandDims)
-{
- Shape in_shape{30, 40};
-
- { // over boundary
- int32_t axis = 3;
- ASSERT_THROW(onert::shape_inference::inferExpandDimsShape(in_shape, axis), std::runtime_error);
- }
- { // over boundary
- int32_t axis = -4;
- ASSERT_THROW(onert::shape_inference::inferExpandDimsShape(in_shape, axis), std::runtime_error);
- }
-}
-
-TEST(ShapeInference, FullyConnected)
-{
- Shape in_shape{3, 4, 5, 6};
- Shape ker_shape{3, 10};
- auto infered_out_shape = onert::shape_inference::inferFullyConnectedShape(in_shape, ker_shape);
-
- ASSERT_EQ(infered_out_shape.rank(), 2);
- ASSERT_EQ(infered_out_shape.dim(0), 36);
- ASSERT_EQ(infered_out_shape.dim(1), 3);
-}
-
-TEST(ShapeInference, Transpose)
-{
- auto check = [&](Shape &in_shape, std::vector<int> perm, Shape &expected) {
- // pre-conditions
- ASSERT_EQ(in_shape.rank(), perm.size());
- ASSERT_EQ(expected.rank(), perm.size());
- auto inferred_out_shape =
- onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size());
- // post-conditions
- ASSERT_EQ(inferred_out_shape.rank(), perm.size());
- for (int32_t dim = 0; dim < expected.rank(); dim++)
- {
- ASSERT_EQ(inferred_out_shape.dim(dim), expected.dim(dim));
- }
- };
- // check for 2-D
- {
- Shape in_shape{2, 3};
- std::vector<int> perm = {1, 0};
- Shape expected{3, 2};
- // int32_t rank = 2;
- check(in_shape, perm, expected);
- }
- // check for 3-D
- {
- Shape in_shape{1, 2, 3};
- std::vector<int> perm = {2, 0, 1};
- Shape expected{3, 1, 2};
- // int32_t rank = 3;
- check(in_shape, perm, expected);
- }
- // check for 4-D
- {
- Shape in_shape{1, 2, 3, 4};
- std::vector<int> perm = {1, 3, 0, 2};
- Shape expected{2, 4, 1, 3};
- // int32_t rank = 4;
- check(in_shape, perm, expected);
- }
-}
-
-TEST(ShapeInference, neg_Transpose)
-{
- Shape in_shape{1, 2, 3};
- // Invalid parameter size
- {
- std::vector<int> perm = {2, 0, 1, 0};
- // int32_t rank = 3;
- ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()),
- std::runtime_error);
- }
- // Invalid parameter value
- {
- std::vector<int> perm = {2, 0, 3};
- // int32_t rank = 3;
- ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()),
- std::runtime_error);
- }
-}
-
-TEST(ShapeInference, Gather)
-{
- auto check = [&](Shape &input, Shape &indices, Shape &expected, int32_t axis) {
- int rank = input.rank();
- auto actual = onert::shape_inference::inferGatherShape(input, indices, axis, rank);
-
- ASSERT_EQ(actual.rank(), expected.rank());
-
- for (int32_t dim = 0; dim < expected.rank(); dim++)
- ASSERT_EQ(actual.dim(dim), expected.dim(dim));
- };
-
- // check for 2-D, 3-D, axis 0
- {
- Shape input{3, 4};
- Shape indices{1, 1, 2};
- int32_t axis = 0;
- Shape expected{1, 1, 2, 4};
- check(input, indices, expected, axis);
- }
-
- // check for 2-D, 3-D, axis 1
- {
- Shape input{3, 4};
- Shape indices{1, 2, 1};
- int32_t axis = 1;
- Shape expected{3, 1, 2, 1};
- check(input, indices, expected, axis);
- }
-
- // check for 3-D, 2-D, axis 0
- {
- Shape input{2, 3, 4};
- Shape indices{1, 2};
- int32_t axis = 0;
- Shape expected{1, 2, 3, 4};
- check(input, indices, expected, axis);
- }
-
- // check for 3-D, 2-D, axis 2
- {
- Shape input{2, 3, 4};
- Shape indices{2, 1};
- int32_t axis = 2;
- Shape expected{2, 3, 2, 1};
- check(input, indices, expected, axis);
- }
-
- // check for 4D, axis 0
- {
- Shape input{1, 2, 3, 4};
- Shape indices{2};
- int32_t axis = 0;
- Shape expected{2, 2, 3, 4};
- check(input, indices, expected, axis);
- }
-}
-
-TEST(ShapeInference, BCQFullyConnected)
-{
- auto check = [&](Shape &in_shape, Shape &cluster_shape, std::vector<int> cluster,
- Shape &expected) {
- auto actual =
- onert::shape_inference::inferBCQFullyConnectedShape(in_shape, cluster_shape, cluster.data());
- ASSERT_EQ(actual.rank(), expected.rank());
-
- for (int32_t dim = 0; dim < expected.rank(); dim++)
- ASSERT_EQ(actual.dim(dim), expected.dim(dim));
- };
-
- {
- Shape in_shape{10, 1};
- Shape cluster_shape{3, 2};
- std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
-
- Shape expected{30, 1};
- check(in_shape, cluster_shape, cluster, expected);
- }
-
- {
- Shape in_shape{1, 1};
- Shape cluster_shape{1, 2};
- std::vector<int> cluster = {3, 50};
-
- Shape expected{50, 1};
- check(in_shape, cluster_shape, cluster, expected);
- }
-}
-
-TEST(ShapeInference, BCQGather)
-{
- auto check = [&](Shape &indices_shape, Shape &cluster_shape, std::vector<int> cluster,
- uint32_t hidden_size, uint32_t axis, int rank, Shape &expected) {
- operation::BCQGather::Param param{hidden_size, axis};
- auto actual = onert::shape_inference::inferBCQGatherShape(indices_shape, cluster_shape,
- cluster.data(), rank, param);
- ASSERT_EQ(actual.rank(), expected.rank());
-
- for (int32_t dim = 0; dim < expected.rank(); dim++)
- ASSERT_EQ(actual.dim(dim), expected.dim(dim));
- };
-
- {
- Shape indices_shape{5, 1};
- Shape cluster_shape{3, 2};
- std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
- uint32_t hidden_size = 10;
- uint32_t axis = 0;
- int rank = 2;
-
- Shape expected{5, 1, 10};
- check(indices_shape, cluster_shape, cluster, hidden_size, axis, rank, expected);
- }
-
- {
- Shape indices_shape{5, 1};
- Shape cluster_shape{3, 2};
- std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
- uint32_t hidden_size = 10;
- uint32_t axis = 1;
- int rank = 2;
-
- Shape expected{30, 5, 1};
- check(indices_shape, cluster_shape, cluster, hidden_size, axis, rank, expected);
- }
-}
+++ /dev/null
-../.clang-format.8
\ No newline at end of file
GeneratedTests.fill_ex_4D_float
GeneratedTests.fill_ex_dynamic_nnfw
GeneratedTests.fully_connected_dynamic_nnfw
+GeneratedTests.fully_connected_float_2_weights_as_inputs
GeneratedTests.fully_connected_hybrid_1_nnfw
GeneratedTests.fusedbatchnorm_ex_dynamic_nnfw
GeneratedTests.fusedbatchnorm_ex_float_fusedbatchnorm_1141
GeneratedTests.fill_ex_4D_float
GeneratedTests.fill_ex_dynamic_nnfw
GeneratedTests.fully_connected_dynamic_nnfw
+GeneratedTests.fully_connected_float_2_weights_as_inputs
GeneratedTests.fully_connected_hybrid_1_nnfw
GeneratedTests.fusedbatchnorm_ex_dynamic_nnfw
GeneratedTests.fusedbatchnorm_ex_float_fusedbatchnorm_1141
GeneratedTests.fill_ex_4D_float
GeneratedTests.fill_ex_dynamic_nnfw
GeneratedTests.fully_connected_dynamic_nnfw
+GeneratedTests.fully_connected_float_2_weights_as_inputs
GeneratedTests.fusedbatchnorm_ex_dynamic_nnfw
GeneratedTests.fusedbatchnorm_ex_float_fusedbatchnorm_1141
GeneratedTests.gather_dynamic_nnfw
target_link_libraries(${RUNTIME_NNFW_API_TEST} circle_schema)
install(TARGETS ${RUNTIME_NNFW_API_TEST} DESTINATION unittest_standalone)
+
+# Install nnpackage test model (add)
+set(NNPACKAGE_MODEL_DIR ${NNAS_PROJECT_SOURCE_DIR}/nnpackage/examples/v1.0.0/add)
+set(NNPACKAGE_INSTALL_TARGET unittest_standalone/nnfw_api_gtest_models)
+
+install(DIRECTORY ${NNPACKAGE_MODEL_DIR} DESTINATION ${NNPACKAGE_INSTALL_TARGET}/add)
+
+# Install nnpackage test model (add_no_manifest)
+set(NNPACKAGE_MODEL ${NNPACKAGE_MODEL_DIR}/add.tflite)
+install(FILES ${NNPACKAGE_MODEL} DESTINATION ${NNPACKAGE_INSTALL_TARGET}/add_no_manifest/add_no_manifest)
+
+# Install nnpackage test model (add_invalid_manifest)
+set(NNPACKAGE_MODEL_DIR ${NNAS_PROJECT_SOURCE_DIR}/nnpackage/examples/v1.0.0/add_invalid_manifest)
+install(DIRECTORY ${NNPACKAGE_MODEL_DIR} DESTINATION ${NNPACKAGE_INSTALL_TARGET}/add_invalid_manifest)
+
+# Install nnpackage test model (if)
+set(NNPACKAGE_MODEL_DIR ${NNAS_PROJECT_SOURCE_DIR}/nnpackage/examples/v1.0.0/if_dynamic)
+install(DIRECTORY ${NNPACKAGE_MODEL_DIR} DESTINATION ${NNPACKAGE_INSTALL_TARGET}/if_dynamic)
+
+# Install nnpackage test model (while)
+set(NNPACKAGE_MODEL_DIR ${NNAS_PROJECT_SOURCE_DIR}/nnpackage/examples/v1.0.0/while_dynamic)
+install(DIRECTORY ${NNPACKAGE_MODEL_DIR} DESTINATION ${NNPACKAGE_INSTALL_TARGET}/while_dynamic)
return ind;
}
+uint32_t CircleGen::addTensor(const TensorParams ¶ms, std::vector<float> &scale,
+ std::vector<int64_t> &zero_point)
+{
+ uint32_t ind = curSubgCtx().tensors.size();
+ curSubgCtx().tensors.emplace_back(buildTensor(params, scale, zero_point));
+ return ind;
+}
+
uint32_t CircleGen::addTensor(const TensorParams ¶ms, const SparsityParams &sp)
{
uint32_t ind = curSubgCtx().tensors.size();
circle::BuiltinOptions_ReducerOptions, options);
}
+uint32_t CircleGen::addOperatorMul(const OperatorParams ¶ms,
+ circle::ActivationFunctionType actfn)
+{
+ auto options = circle::CreateMulOptions(_fbb, actfn).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_MUL,
+ circle::BuiltinOptions_MulOptions, options);
+}
+
uint32_t CircleGen::addOperatorNeg(const OperatorParams ¶ms)
{
auto options = circle::CreatePadOptions(_fbb).Union();
circle::BuiltinOptions_PadV2Options, options);
}
+uint32_t CircleGen::addOperatorQuantize(const OperatorParams ¶ms)
+{
+ auto options = circle::CreateQuantizeOptions(_fbb).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_QUANTIZE,
+ circle::BuiltinOptions_QuantizeOptions, options);
+}
+
uint32_t CircleGen::addOperatorRank(const OperatorParams ¶ms)
{
auto options = circle::CreateRankOptions(_fbb).Union();
circle::BuiltinOptions_SelectV2Options, options);
}
+uint32_t CircleGen::addOperatorSlice(const OperatorParams ¶ms)
+{
+ auto options = circle::CreateSliceOptions(_fbb).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_SLICE,
+ circle::BuiltinOptions_SliceOptions, options);
+}
+
+uint32_t CircleGen::addOperatorSoftmax(const OperatorParams ¶ms, float beta)
+{
+ auto options = circle::CreateSoftmaxOptions(_fbb, beta).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_SOFTMAX,
+ circle::BuiltinOptions_SoftmaxOptions, options);
+}
+
uint32_t CircleGen::addOperatorSplit(const OperatorParams ¶ms, int32_t num_split)
{
auto options = circle::CreateSplitOptions(_fbb, num_split).Union();
return addOperatorWithOptions(params, circle::BuiltinOperator_SPLIT,
circle::BuiltinOptions_SplitOptions, options);
}
+
uint32_t CircleGen::addOperatorStridedSlice(const OperatorParams ¶ms, int32_t begin_mask,
int32_t end_mask, int32_t ellipsis_mask,
int32_t new_axis_mask, int32_t shrink_axis_mask)
return addOperatorWithOptions(params, circle::BuiltinOperator_STRIDED_SLICE,
circle::BuiltinOptions_StridedSliceOptions, options);
}
+
+uint32_t CircleGen::addOperatorSub(const OperatorParams ¶ms,
+ circle::ActivationFunctionType actfn)
+{
+ auto options = circle::CreateSubOptions(_fbb, actfn).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_SUB,
+ circle::BuiltinOptions_SubOptions, options);
+}
+
uint32_t CircleGen::addOperatorTile(const OperatorParams ¶ms)
{
auto options = circle::CreateTileOptions(_fbb).Union();
false /* is_variable */, 0 /* sparsity */, 0 /* shape_signature */);
}
+flatbuffers::Offset<circle::Tensor> CircleGen::buildTensor(const TensorParams ¶ms,
+ std::vector<float> &scales,
+ std::vector<int64_t> &zero_points)
+{
+ auto shape = _fbb.CreateVector(params.shape);
+ auto name = _fbb.CreateString(params.name);
+ auto quantization =
+ circle::CreateQuantizationParametersDirect(_fbb, nullptr, nullptr, &scales, &zero_points);
+ return circle::CreateTensor(_fbb, shape, params.tensor_type, params.buffer, name, quantization,
+ false /* is_variable */, 0 /* sparsity */, 0 /* shape_signature */);
+}
+
flatbuffers::Offset<circle::SparsityParameters>
CircleGen::buildSparsityParameters(const SparsityParams &sp)
{
uint32_t addBuffer(const uint8_t *buf, size_t size);
uint32_t addTensor(const TensorParams ¶ms);
uint32_t addTensor(const TensorParams ¶ms, float scale, int64_t zero_point);
+ uint32_t addTensor(const TensorParams ¶ms, std::vector<float> &scale,
+ std::vector<int64_t> &zero_point);
uint32_t addTensor(const TensorParams ¶ms, const SparsityParams &sp);
void setInputsAndOutputs(const std::vector<int> &inputs, const std::vector<int> &outputs);
uint32_t nextSubgraph();
uint32_t addOperatorLeakyRelu(const OperatorParams ¶ms, float alpha);
uint32_t addOperatorLess(const OperatorParams ¶ms);
uint32_t addOperatorLogSoftmax(const OperatorParams ¶ms);
+ uint32_t addOperatorMul(const OperatorParams ¶ms, circle::ActivationFunctionType actfn);
uint32_t addOperatorMean(const OperatorParams ¶ms, bool keep_dims);
uint32_t addOperatorNeg(const OperatorParams ¶ms);
uint32_t addOperatorOneHot(const OperatorParams ¶ms, int32_t axis);
uint32_t addOperatorPad(const OperatorParams ¶ms);
uint32_t addOperatorPadV2(const OperatorParams ¶ms);
+ uint32_t addOperatorQuantize(const OperatorParams ¶ms);
uint32_t addOperatorRank(const OperatorParams ¶ms);
uint32_t addOperatorReduce(const OperatorParams ¶ms, circle::BuiltinOperator reduce_op,
bool keep_dims);
circle::TensorType type = circle::TensorType::TensorType_INT32);
uint32_t addOperatorSelect(const OperatorParams ¶ms);
uint32_t addOperatorSelectV2(const OperatorParams ¶ms);
+ uint32_t addOperatorSlice(const OperatorParams ¶ms);
+ uint32_t addOperatorSoftmax(const OperatorParams ¶ms, float beta);
uint32_t addOperatorSplit(const OperatorParams ¶ms, int32_t num_split);
uint32_t addOperatorSqrt(const OperatorParams ¶ms);
uint32_t addOperatorSquare(const OperatorParams ¶ms);
uint32_t addOperatorStridedSlice(const OperatorParams ¶ms, int32_t begin_mask = 0,
int32_t end_mask = 0, int32_t ellipsis_mask = 0,
int32_t new_axis_mask = 0, int32_t shrink_axis_mask = 0);
+ uint32_t addOperatorSub(const OperatorParams ¶ms, circle::ActivationFunctionType actfn);
uint32_t addOperatorTile(const OperatorParams ¶ms);
uint32_t addOperatorTranspose(const OperatorParams ¶ms);
uint32_t addOperatorWhile(const OperatorParams ¶ms, uint32_t cond_subg, uint32_t body_subg);
flatbuffers::Offset<circle::Tensor> buildTensor(const TensorParams ¶ms);
flatbuffers::Offset<circle::Tensor> buildTensor(const TensorParams ¶ms, float scale,
int64_t zero_point);
+ flatbuffers::Offset<circle::Tensor> buildTensor(const TensorParams ¶ms,
+ std::vector<float> &scales,
+ std::vector<int64_t> &zero_points);
flatbuffers::Offset<circle::SparsityParameters> buildSparsityParameters(const SparsityParams &sp);
flatbuffers::Offset<circle::Tensor> buildTensor(const TensorParams ¶ms,
const SparsityParams &sp);
NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "GRAPH_DOT_DUMP", "0"));
NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "GRAPH_DOT_DUMP", "1"));
NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "GRAPH_DOT_DUMP", "2"));
- NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "OP_SEQ_MAX_NODE", "0"));
- NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "OP_SEQ_MAX_NODE", "1"));
NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "EXECUTOR", "Linear"));
NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "OP_BACKEND_ALLOPS", "cpu"));
NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "USE_SCHEDULER", "0"));
*/
#include "fixtures.h"
+#include "one_op_tests/WhileTestModel.h"
TEST_F(ValidationTestTwoSessions, neg_two_sessions_create)
{
CircleBuffer cbuf;
};
-TEST_F(ValidationTestTwoSessionsCreated, two_sessions_run_simple_model)
+TEST_F(ValidationTestTwoSessionsCreated, two_sessions_run_simple_AaveragePool_model)
{
constexpr int N = 64, H = 64, W = 64, C = 3;
AveragePoolModel model(N, H, W, C);
SUCCEED();
}
+TEST_F(ValidationTestTwoSessionsCreated, neg_two_sessions_model_load)
+{
+ constexpr int N = 64, H = 64, W = 64, C = 3;
+ AveragePoolModel model(N, H, W, C);
+
+ NNFW_ENSURE_SUCCESS(
+ nnfw_load_circle_from_buffer(_session1, model.cbuf.buffer(), model.cbuf.size()));
+ ASSERT_EQ(nnfw_load_circle_from_buffer(nullptr, model.cbuf.buffer(), model.cbuf.size()),
+ NNFW_STATUS_UNEXPECTED_NULL);
+}
+
+TEST_F(ValidationTestTwoSessionsCreated, two_sessions_run_simple_While_model)
+{
+ WhileModelLoop10 model;
+
+ NNFW_ENSURE_SUCCESS(
+ nnfw_load_circle_from_buffer(_session1, model.cbuf.buffer(), model.cbuf.size()));
+ NNFW_ENSURE_SUCCESS(
+ nnfw_load_circle_from_buffer(_session2, model.cbuf.buffer(), model.cbuf.size()));
+
+ NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session1, "cpu"));
+ NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session2, "cpu"));
+
+ NNFW_ENSURE_SUCCESS(nnfw_prepare(_session1));
+ NNFW_ENSURE_SUCCESS(nnfw_prepare(_session2));
+
+ std::vector<float> in_buf1(model.inputCount()); // any value
+ std::vector<float> out_buf1(model.outputputCount());
+
+ NNFW_ENSURE_SUCCESS(nnfw_set_input(_session1, 0, NNFW_TYPE_TENSOR_FLOAT32, in_buf1.data(),
+ in_buf1.size() * model.sizeOfDType()));
+ NNFW_ENSURE_SUCCESS(nnfw_set_output(_session1, 0, NNFW_TYPE_TENSOR_FLOAT32, out_buf1.data(),
+ out_buf1.size() * model.sizeOfDType()));
+
+ std::vector<float> in_buf2(model.inputCount()); // any value
+ std::vector<float> out_buf2(model.outputputCount());
+
+ NNFW_ENSURE_SUCCESS(nnfw_set_input(_session2, 0, NNFW_TYPE_TENSOR_FLOAT32, in_buf2.data(),
+ in_buf2.size() * model.sizeOfDType()));
+ NNFW_ENSURE_SUCCESS(nnfw_set_output(_session2, 0, NNFW_TYPE_TENSOR_FLOAT32, out_buf2.data(),
+ out_buf2.size() * model.sizeOfDType()));
+
+ NNFW_ENSURE_SUCCESS(nnfw_run_async(_session1));
+ NNFW_ENSURE_SUCCESS(nnfw_run_async(_session2));
+
+ NNFW_ENSURE_SUCCESS(nnfw_await(_session1));
+ NNFW_ENSURE_SUCCESS(nnfw_await(_session2));
+
+ SUCCEED();
+}
+
// TODO Write two-session-test with large models run by threads
ValidationTestSessionCreated::SetUp();
if (PackageNo == NNPackages::ADD)
{
- auto cbuf = genAddModel();
- NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(_session, cbuf.buffer(), cbuf.size()));
+ // NOTE the circle buffer must be kept until finishing the test, so keep it as a member
+ _cbuf = genAddModel();
+ NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(_session, _cbuf.buffer(), _cbuf.size()));
}
else
{
}
void TearDown() override { ValidationTestSessionCreated::TearDown(); }
+
+private:
+ CircleBuffer _cbuf; // Used only for models from buffer, unused for models from files
};
template <int PackageNo>
auto cbuf = genAddModel();
NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(obj.session, cbuf.buffer(), cbuf.size()));
ASSERT_EQ(nnfw_prepare(obj.session), NNFW_STATUS_NO_ERROR);
+ _cbufs.push_back(std::move(cbuf)); // Keep the buffer so it can outlive the session
uint32_t num_inputs;
ASSERT_EQ(nnfw_input_size(obj.session, &num_inputs), NNFW_STATUS_NO_ERROR);
protected:
std::array<SessionObject, NUM_SESSIONS> _objects;
+ std::vector<CircleBuffer> _cbufs;
};
class ValidationTestTwoSessions : public ValidationTest
SUCCEED();
}
+TEST_F(GenModelTest, OneOp_Add_VarToVarInt8)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 1., 2);
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 2., 3);
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.5, -6);
+ cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5, -4, -7, 4}}, {{0, -32, -46, 2}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_BroadcastAdd_VarToVarInt8)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 1., 2);
+ int rhs = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_INT8}, 2., 3);
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.5, -6);
+ cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5}}, {{0, 4, 2, 6}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
TEST_F(GenModelTest, OneOp_Add_VarToVarSame)
{
CircleGen cgen;
SUCCEED();
}
+TEST_F(GenModelTest, neg_OneOp_Add_DifferentQuant8Type)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.2, -3);
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 2);
+ int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_INT8});
+ cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
TEST_F(GenModelTest, neg_OneOp_Add_InvalidShape)
{
CircleGen cgen;
SUCCEED();
}
+
+TEST_F(GenModelTest, net_OneOp_Add_VarToVarInt16)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT16}, 1., 2);
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT16}, 2., 3);
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT16}, 0.5, -6);
+ cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ // _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5, -4, -7, 4}}, {{0, -32, -46, 2}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailCompile();
+
+ SUCCEED();
+}
#include "GenModelTest.h"
-TEST_F(GenModelTest, OneOp_AvgPool2D)
+struct AvgPool2DParam
{
- CircleGen cgen;
- int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
- circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(uniformTCD<float>({{1, 3, 2, 4}}, {{2.5}}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
- SUCCEED();
-}
+ TestCaseData tcd;
+ std::vector<int32_t> input_shape;
+ std::vector<int32_t> output_shape;
+ struct filter_stride
+ {
+ int32_t filter_w;
+ int32_t filter_h;
+ int32_t stride_w;
+ int32_t stride_h;
+ } param = {1, 1, 1, 1};
+ struct data_type
+ {
+ circle::TensorType data_type;
+ float scale;
+ int64_t zero_point;
+ } type = {circle::TensorType::TensorType_FLOAT32, 0.0f, 0};
+ std::vector<std::string> backend = {"acl_cl", "acl_neon", "cpu"};
+};
+
+class AveragePool2DVariation : public GenModelTest,
+ public ::testing::WithParamInterface<AvgPool2DParam>
+{
+};
-TEST_F(GenModelTest, OneOp_AvgPool2D_Large)
+TEST_P(AveragePool2DVariation, Test)
{
+ auto ¶m = GetParam();
CircleGen cgen;
- int in = cgen.addTensor({{1, 16, 32, 2}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 1, 2, 2}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 16, 16, 16, 16,
+
+ int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+ param.type.zero_point);
+ int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+ param.type.zero_point);
+ cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w,
+ param.param.stride_h, param.param.filter_w, param.param.filter_h,
circle::ActivationFunctionType_NONE);
cgen.setInputsAndOutputs({in}, {out});
_context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(uniformTCD<float>({std::vector<float>(1024, 99)}, {{99, 99, 99, 99}}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->addTestCase(param.tcd);
+ _context->setBackends(param.backend);
SUCCEED();
}
+// Test with different input type and value
+INSTANTIATE_TEST_CASE_P(
+ GenModelTest, AveragePool2DVariation,
+ ::testing::Values(
+ // float data
+ AvgPool2DParam{
+ uniformTCD<float>({{1, 3, 2, 4}}, {{2.5}}), {1, 2, 2, 1}, {1, 1, 1, 1}, {2, 2, 2, 2}},
+ // float data - large
+ AvgPool2DParam{uniformTCD<float>({std::vector<float>(18 * 36 * 2, 99)}, {{99, 99, 99, 99}}),
+ {1, 18, 36, 2},
+ {1, 1, 2, 2},
+ {18, 18, 18, 18}},
+ // uint8_t data
+ AvgPool2DParam{uniformTCD<uint8_t>({{2, 6, 4, 8}}, {{5}}),
+ {1, 2, 2, 1},
+ {1, 1, 1, 1},
+ {2, 2, 2, 2},
+ {circle::TensorType::TensorType_UINT8, 1.2, 3}},
+ // uint8_t data -large
+ AvgPool2DParam{
+ uniformTCD<uint8_t>({{std::vector<uint8_t>(18 * 36 * 2, 99)}}, {{99, 99, 99, 99}}),
+ {1, 18, 36, 2},
+ {1, 1, 2, 2},
+ {18, 18, 18, 18},
+ {circle::TensorType::TensorType_UINT8, 1.2, 3}},
+ // int8_t data
+ // TODO enable acl-cl, acl-neon backend
+ AvgPool2DParam{uniformTCD<int8_t>({{2, -6, 4, -8}}, {{-2}}),
+ {1, 2, 2, 1},
+ {1, 1, 1, 1},
+ {2, 2, 2, 2},
+ {circle::TensorType::TensorType_INT8, 2.0, -1},
+ {"cpu"}},
+ // int8_t data - large
+ // TODO enable acl-cl, acl-neon backend
+ AvgPool2DParam{
+ uniformTCD<int8_t>({{std::vector<int8_t>(18 * 36 * 2, -99)}}, {{-99, -99, -99, -99}}),
+ {1, 18, 36, 2},
+ {1, 1, 2, 2},
+ {18, 18, 18, 18},
+ {circle::TensorType::TensorType_INT8, 2.0, -1},
+ {"cpu"}}));
+
TEST_F(GenModelTest, neg_OneOp_AvgPool2D_3DInput)
{
// 3D Tensors are not supported
SUCCEED();
}
+TEST_F(GenModelTest, OneOp_Conv2D_I8)
+{
+ CircleGen cgen;
+ std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<int32_t> bias_data{0, 2, 4};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
+ int weight =
+ cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf}, 0.5, 0);
+ int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
+ int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 1.0, 0);
+ cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<int8_t>({{10, 10, 10}}, {{15, 38, 61}}));
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Conv2D_I8_PerChannel)
+{
+ CircleGen cgen;
+ std::vector<int8_t> weight_data{1, 2, 3, 1, 2, 3, 7, 8, 9};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<int32_t> bias_data{0, 0, 0};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
+ std::vector<float> weight_scales = {0.5, 1, 0.5};
+ std::vector<int64_t> weight_zeropoints = {0, 0, 0};
+ int weight = cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf},
+ weight_scales, weight_zeropoints);
+ int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
+ int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 1.0, 0);
+ cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<int8_t>({{10, 10, 10}}, {{15, 30, 60}}));
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
+
TEST_F(GenModelTest, neg_OneOp_Conv2D_Type)
{
CircleGen cgen;
SUCCEED();
}
+
+TEST_F(GenModelTest, neg_OneOp_Conv2D_I8_NonZero_ZeroPoint)
+{
+ CircleGen cgen;
+ std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<int32_t> bias_data{0, 2, 4};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
+ int weight =
+ cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf}, 0.5, 17);
+ int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
+ int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 1.0, 0);
+ cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Conv2D_I8_NonZero_ZeroPoints)
+{
+ CircleGen cgen;
+ std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<int32_t> bias_data{0, 2, 4};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
+ std::vector<float> weight_scales = {0.5, 1, 0.5};
+ std::vector<int64_t> weight_zeropoints = {0, 0, 10};
+ int weight = cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf},
+ weight_scales, weight_zeropoints);
+ int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
+ int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32}, 1.0, 0);
+ cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
return cgen.finish();
}
-CircleBuffer genSimpleDepthwiseConv2DQuantizedModel(int stride, int input_depth,
- int depth_multiplier)
+template <typename T> struct DepthwiseConv2DQuantTestParam
+{
+ int stride = 1; // Used for both height and width
+ int input_depth = 1;
+ int depth_multiplier = 1;
+ std::vector<T> ref_output;
+};
+
+template <typename T>
+class DepthwiseConv2DQuantTest
+ : public GenModelTest,
+ public ::testing::WithParamInterface<DepthwiseConv2DQuantTestParam<T>>
+{
+};
+
+using DepthwiseConv2DQuantTestParamU8 = DepthwiseConv2DQuantTestParam<uint8_t>;
+using DepthwiseConv2DQuantTestU8 = DepthwiseConv2DQuantTest<uint8_t>;
+
+CircleBuffer genDepthwiseConv2DQuantU8Model(int stride, int input_depth, int depth_multiplier)
{
assert(1 <= stride && stride <= 2);
assert(1 <= input_depth && input_depth <= 16);
return cgen.finish();
}
-struct DepthwiseConv2DVariationParam
-{
- int stride = 1; // Used for both height and width
- int input_depth = 1;
- int depth_multiplier = 1;
- std::vector<uint8_t> ref_output;
-};
-
-class DepthwiseConv2DVariation : public GenModelTest,
- public ::testing::WithParamInterface<DepthwiseConv2DVariationParam>
-{
-};
-
-TEST_P(DepthwiseConv2DVariation, Test)
+TEST_P(DepthwiseConv2DQuantTestU8, Test)
{
// Same input is used for all tests but output differs
static const std::vector<uint8_t> input64{
2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 3, 5, 8, 8, 5, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2};
auto ¶m = GetParam();
- _context = std::make_unique<GenModelTestContext>(genSimpleDepthwiseConv2DQuantizedModel(
- param.stride, param.input_depth, param.depth_multiplier));
+ _context = std::make_unique<GenModelTestContext>(
+ genDepthwiseConv2DQuantU8Model(param.stride, param.input_depth, param.depth_multiplier));
std::vector<uint8_t> ref_input(input64.begin(), input64.begin() + param.input_depth * 4);
_context->addTestCase(uniformTCD<uint8_t>({ref_input}, {param.ref_output}));
_context->setBackends({"acl_cl", "acl_neon", "cpu"});
// Test with different InputDepth and DepthMultiplier. The values are intended to test optimized CPU
// kernels.
INSTANTIATE_TEST_CASE_P(
- GenModelTest, DepthwiseConv2DVariation,
+ GenModelTest, DepthwiseConv2DQuantTestU8,
::testing::Values(
// Stride == 1
- DepthwiseConv2DVariationParam{1, 8, 1, std::vector<uint8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
- DepthwiseConv2DVariationParam{1, 4, 2, std::vector<uint8_t>{0, 0, 2, 3, 0, 2, 6, 9}},
- DepthwiseConv2DVariationParam{
+ DepthwiseConv2DQuantTestParamU8{1, 8, 1, std::vector<uint8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
+ DepthwiseConv2DQuantTestParamU8{1, 4, 2, std::vector<uint8_t>{0, 0, 2, 3, 0, 2, 6, 9}},
+ DepthwiseConv2DQuantTestParamU8{
1, 2, 8, std::vector<uint8_t>{0, 1, 2, 3, 0, 1, 2, 3, 0, 2, 4, 6, 0, 2, 4, 6}},
- DepthwiseConv2DVariationParam{1, 2, 2, std::vector<uint8_t>{0, 1, 4, 6}},
- DepthwiseConv2DVariationParam{1, 2, 1, std::vector<uint8_t>{2, 5}},
- DepthwiseConv2DVariationParam{1, 1, 2, std::vector<uint8_t>{2, 4}},
- DepthwiseConv2DVariationParam{1, 1, 4, std::vector<uint8_t>{0, 2, 3, 5}},
- DepthwiseConv2DVariationParam{1, 4, 1, std::vector<uint8_t>{0, 1, 4, 9}},
- DepthwiseConv2DVariationParam{
+ DepthwiseConv2DQuantTestParamU8{1, 2, 2, std::vector<uint8_t>{0, 1, 4, 6}},
+ DepthwiseConv2DQuantTestParamU8{1, 2, 1, std::vector<uint8_t>{2, 5}},
+ DepthwiseConv2DQuantTestParamU8{1, 1, 2, std::vector<uint8_t>{2, 4}},
+ DepthwiseConv2DQuantTestParamU8{1, 1, 4, std::vector<uint8_t>{0, 2, 3, 5}},
+ DepthwiseConv2DQuantTestParamU8{1, 4, 1, std::vector<uint8_t>{0, 1, 4, 9}},
+ DepthwiseConv2DQuantTestParamU8{
1, 4, 4, std::vector<uint8_t>{0, 0, 0, 0, 0, 1, 2, 3, 0, 2, 4, 6, 0, 3, 6, 9}},
- DepthwiseConv2DVariationParam{1, 12, 1,
- std::vector<uint8_t>{0, 3, 7, 12, 0, 4, 7, 12, 0, 4, 9, 16}},
+ DepthwiseConv2DQuantTestParamU8{1, 12, 1,
+ std::vector<uint8_t>{0, 3, 7, 12, 0, 4, 7, 12, 0, 4, 9, 16}},
// Stride == 2
- DepthwiseConv2DVariationParam{2, 4, 1, std::vector<uint8_t>{0, 1, 4, 9}},
- DepthwiseConv2DVariationParam{2, 2, 1, std::vector<uint8_t>{2, 5}},
- DepthwiseConv2DVariationParam{2, 1, 8, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5}},
- DepthwiseConv2DVariationParam{2, 1, 32, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3,
- 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2,
- 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
- DepthwiseConv2DVariationParam{
+ DepthwiseConv2DQuantTestParamU8{2, 4, 1, std::vector<uint8_t>{0, 1, 4, 9}},
+ DepthwiseConv2DQuantTestParamU8{2, 2, 1, std::vector<uint8_t>{2, 5}},
+ DepthwiseConv2DQuantTestParamU8{2, 1, 8, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5}},
+ DepthwiseConv2DQuantTestParamU8{2, 1, 32, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3,
+ 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2,
+ 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
+ DepthwiseConv2DQuantTestParamU8{
2, 1, 20, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
- DepthwiseConv2DVariationParam{
+ DepthwiseConv2DQuantTestParamU8{
2, 1, 16, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
- DepthwiseConv2DVariationParam{2, 8, 1, std::vector<uint8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
- DepthwiseConv2DVariationParam{
+ DepthwiseConv2DQuantTestParamU8{2, 8, 1, std::vector<uint8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
+ DepthwiseConv2DQuantTestParamU8{
2, 8, 2, std::vector<uint8_t>{0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8}},
- DepthwiseConv2DVariationParam{
+ DepthwiseConv2DQuantTestParamU8{
2, 16, 1, std::vector<uint8_t>{0, 3, 8, 16, 0, 4, 7, 12, 0, 3, 7, 13, 0, 4, 7, 12}}));
+using DepthwiseConv2DQuantTestParamI8 = DepthwiseConv2DQuantTestParam<int8_t>;
+using DepthwiseConv2DQuantTestI8 = DepthwiseConv2DQuantTest<int8_t>;
+
+CircleBuffer genDepthwiseConv2DQuantI8Model(int stride, int input_depth, int depth_multiplier)
+{
+ assert(1 <= stride && stride <= 2);
+ assert(1 <= input_depth && input_depth <= 16);
+ assert(1 <= depth_multiplier && depth_multiplier <= 32);
+
+ const int output_depth = input_depth * depth_multiplier;
+ assert(1 <= output_depth && output_depth <= 32);
+
+ CircleGen cgen;
+ uint32_t ker_buf = cgen.addBuffer(std::vector<int8_t>{
+ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
+ 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
+ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
+ 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
+ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3});
+ uint32_t bias_buf = cgen.addBuffer(std::vector<int32_t>(output_depth, 0));
+ int in = cgen.addTensor({{1, 2, 2, input_depth}, circle::TensorType_INT8}, 0.5, 0);
+ int ker = cgen.addTensor({{1, 2, 2, output_depth}, circle::TensorType_INT8, ker_buf}, 0.5, 0);
+ int bias = cgen.addTensor({{output_depth}, circle::TensorType_INT32, bias_buf}, 0.25, 0);
+ int out = cgen.addTensor({{1, 1, 1, output_depth}, circle::TensorType_INT8}, 1, 0);
+ cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, circle::Padding::Padding_VALID, stride,
+ stride, depth_multiplier, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+ return cgen.finish();
+}
+
+TEST_P(DepthwiseConv2DQuantTestI8, Test)
+{
+ // Same input is used for all tests but output differs
+ static const std::vector<int8_t> input64{
+ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2,
+ 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 3, 5, 8, 8, 5, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2};
+
+ auto ¶m = GetParam();
+ _context = std::make_unique<GenModelTestContext>(
+ genDepthwiseConv2DQuantI8Model(param.stride, param.input_depth, param.depth_multiplier));
+ std::vector<int8_t> ref_input(input64.begin(), input64.begin() + param.input_depth * 4);
+ _context->addTestCase(uniformTCD<int8_t>({ref_input}, {param.ref_output}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+// Test with different InputDepth and DepthMultiplier. The values are intended to test optimized CPU
+// kernels.
+INSTANTIATE_TEST_CASE_P(
+ GenModelTest, DepthwiseConv2DQuantTestI8,
+ ::testing::Values(
+ // Stride == 1
+ DepthwiseConv2DQuantTestParamI8{1, 8, 1, std::vector<int8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
+ DepthwiseConv2DQuantTestParamI8{1, 4, 2, std::vector<int8_t>{0, 0, 2, 3, 0, 2, 6, 9}},
+ DepthwiseConv2DQuantTestParamI8{
+ 1, 2, 8, std::vector<int8_t>{0, 1, 2, 3, 0, 1, 2, 3, 0, 2, 4, 6, 0, 2, 4, 6}},
+ DepthwiseConv2DQuantTestParamI8{1, 2, 2, std::vector<int8_t>{0, 1, 4, 6}},
+ DepthwiseConv2DQuantTestParamI8{1, 2, 1, std::vector<int8_t>{2, 5}},
+ DepthwiseConv2DQuantTestParamI8{1, 1, 2, std::vector<int8_t>{2, 4}},
+ DepthwiseConv2DQuantTestParamI8{1, 1, 4, std::vector<int8_t>{0, 2, 3, 5}},
+ DepthwiseConv2DQuantTestParamI8{1, 4, 1, std::vector<int8_t>{0, 1, 4, 9}},
+ DepthwiseConv2DQuantTestParamI8{
+ 1, 4, 4, std::vector<int8_t>{0, 0, 0, 0, 0, 1, 2, 3, 0, 2, 4, 6, 0, 3, 6, 9}},
+ DepthwiseConv2DQuantTestParamI8{1, 12, 1,
+ std::vector<int8_t>{0, 3, 7, 12, 0, 4, 7, 12, 0, 4, 9, 16}},
+ // Stride == 2
+ DepthwiseConv2DQuantTestParamI8{2, 4, 1, std::vector<int8_t>{0, 1, 4, 9}},
+ DepthwiseConv2DQuantTestParamI8{2, 2, 1, std::vector<int8_t>{2, 5}},
+ DepthwiseConv2DQuantTestParamI8{2, 1, 8, std::vector<int8_t>{0, 2, 3, 5, 0, 2, 3, 5}},
+ DepthwiseConv2DQuantTestParamI8{2, 1, 32, std::vector<int8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3,
+ 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2,
+ 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
+ DepthwiseConv2DQuantTestParamI8{
+ 2, 1, 20, std::vector<int8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
+ DepthwiseConv2DQuantTestParamI8{
+ 2, 1, 16, std::vector<int8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
+ DepthwiseConv2DQuantTestParamI8{2, 8, 1, std::vector<int8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
+ DepthwiseConv2DQuantTestParamI8{
+ 2, 8, 2, std::vector<int8_t>{0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8}},
+ DepthwiseConv2DQuantTestParamI8{
+ 2, 16, 1, std::vector<int8_t>{0, 3, 8, 16, 0, 4, 7, 12, 0, 3, 7, 13, 0, 4, 7, 12}}));
+
TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_InvalidPaddingType)
{
_context = std::make_unique<GenModelTestContext>(genNegTestDepthwiseConv2DModel(
}
// TODO add other invalid operation tests like above
+
+TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_I8_NonZero_ZeroPoints)
+{
+ CircleGen cgen;
+ std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<int32_t> bias_data{0, 2};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_INT8}, 0.5, 0);
+ std::vector<float> weight_scales = {0.5, 1};
+ std::vector<int64_t> weight_zeropoints = {0, 10};
+ int weight = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_INT8, weight_buf},
+ weight_scales, weight_zeropoints);
+ int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_INT32, bias_buf});
+ int out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32}, 1.0, 0);
+ cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_Mul_Uint8_VarVar)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 3);
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 2.0, 1);
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.5, 2);
+ cgen.addOperatorMul({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<uint8_t>({{3, 12, 5, 2}, {5, 4, 7, 0}}, {{2, 110, 50, 6}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Mul_Int8_VarVar)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 1.0, 2);
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 2.0, 3);
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.5, -6);
+ cgen.addOperatorMul({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5, -4, -7, 4}}, {{-14, -34, -6, 2}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_MulBroadcast_Uint8_VarVar)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 3);
+ int rhs = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_UINT8}, 2.0, 1);
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.5, 2);
+ cgen.addOperatorMul({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<uint8_t>({{3, 12, 5, 4}, {5}}, {{2, 146, 34, 18}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_MulBroadcast_Int8_VarVar)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 1.0, 2);
+ int rhs = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_INT8}, 2.0, 3);
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.5, -6);
+ cgen.addOperatorMul({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5}}, {{-14, 2, -6, 10}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Mul_InvalidType)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 2);
+ int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorMul({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Mul_InvalidShape)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorMul({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailCompile();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Mul_OneOperand)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorMul({{in}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Mul_ThreeOperands)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorMul({{in, in, in}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
#include "GenModelTest.h"
-TEST_F(GenModelTest, OneOp_Pad)
+// Input shape: {1, 2, 2, 1}
+// Padding: {0, 0, 1, 1, 1, 1, 0, 0}
+// Output shape: {1, 4, 4, 1}
+struct PadParam
{
+ TestCaseData tcd;
+ circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32;
+ float scale = 0.0f;
+ int64_t zero_point = 0;
+};
+
+class PadVariation : public GenModelTest, public ::testing::WithParamInterface<PadParam>
+{
+};
+
+TEST_P(PadVariation, Test)
+{
+ auto ¶m = GetParam();
+
CircleGen cgen;
- int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
uint32_t padding_buf = cgen.addBuffer(padding_data);
int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
- int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point);
cgen.addOperatorPad({{in, padding}, {out}});
cgen.setInputsAndOutputs({in}, {out});
_context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(
- uniformTCD<float>({{1, 2, 3, 4}}, {{0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 0, 0, 0}}));
+ _context->addTestCase(param.tcd);
_context->setBackends({"acl_cl", "acl_neon", "cpu"});
SUCCEED();
}
+// Test with different value type
+INSTANTIATE_TEST_CASE_P(
+ GenModelTest, PadVariation,
+ ::testing::Values(
+ // float value
+ PadParam{uniformTCD<float>({{1, 2, 3, 4}}, {{0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 0, 0, 0}})},
+ // uint8 value
+ PadParam{
+ uniformTCD<uint8_t>({{1, 2, 3, 4}}, {{8, 8, 8, 8, 8, 1, 2, 8, 8, 3, 4, 8, 8, 8, 8, 8}}),
+ circle::TensorType::TensorType_UINT8, 1.0, 8},
+ // int8 value
+ PadParam{uniformTCD<int8_t>({{-2, -1, 1, 2}},
+ {{-5, -5, -5, -5, -5, -2, -1, -5, -5, 1, 2, -5, -5, -5, -5, -5}}),
+ circle::TensorType::TensorType_INT8, 1.0, -5}));
+
TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadRank)
{
CircleGen cgen;
SUCCEED();
}
+
+TEST_F(GenModelTest, neg_OneOp_Pad_Type)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
+ uint32_t padding_buf = cgen.addBuffer(padding_data);
+ int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+ int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 1);
+
+ cgen.addOperatorPad({{in, padding}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Pad_QuantParam)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 1);
+ std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
+ uint32_t padding_buf = cgen.addBuffer(padding_data);
+ int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+ int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 3);
+
+ cgen.addOperatorPad({{in, padding}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
SUCCEED();
}
+
+TEST_F(GenModelTest, neg_OneOp_PadV2_Type)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+ std::vector<int32_t> padding_data{1, 1, 1, 1};
+ uint32_t padding_buf = cgen.addBuffer(padding_data);
+ int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+ std::vector<uint8_t> padding_value_data{3};
+ uint32_t padding_value_buf = cgen.addBuffer(padding_value_data);
+ int padding_value =
+ cgen.addTensor({{1}, circle::TensorType::TensorType_UINT8, padding_value_buf}, 1.0, 1);
+
+ int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+
+ cgen.addOperatorPadV2({{in, padding, padding_value}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_PadV2_QuantParam)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 2);
+ std::vector<int32_t> padding_data{1, 1, 1, 1};
+ uint32_t padding_buf = cgen.addBuffer(padding_data);
+ int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+ std::vector<uint8_t> padding_value_data{3};
+ uint32_t padding_value_buf = cgen.addBuffer(padding_value_data);
+ int padding_value =
+ cgen.addTensor({{1}, circle::TensorType::TensorType_UINT8, padding_value_buf}, 1.0, 1);
+
+ int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 1);
+
+ cgen.addOperatorPadV2({{in, padding, padding_value}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+CircleGen genSimpleQuantizeModel(circle::TensorType from_t, float input_scale, int input_zeropoint,
+ circle::TensorType to_t, float output_scale, int output_zeropoint)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 4, 4, 1}, from_t}, input_scale, input_zeropoint);
+ int out = cgen.addTensor({{1, 4, 4, 1}, to_t}, output_scale, output_zeropoint);
+ cgen.addOperatorQuantize({{in}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+ return cgen;
+}
+
+TEST_F(GenModelTest, OneOp_Quantize_Uint8toInt8)
+{
+ CircleGen cgen =
+ genSimpleQuantizeModel(circle::TensorType_UINT8, 1., 128, circle::TensorType_INT8, 2., -10);
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(
+ TestCaseData{}
+ .addInput<uint8_t>({127, 48, 151, 232, 56, 176, 47, 37, 51, 52, 39, 94, 15, 108, 142, 243})
+ .addOutput<int8_t>(
+ {-10, -50, 2, 42, -46, 14, -50, -55, -48, -48, -54, -27, -66, -20, -3, 48}));
+ _context->setBackends({"cpu"});
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Quantize_Int8toUint8)
+{
+ CircleGen cgen =
+ genSimpleQuantizeModel(circle::TensorType_INT8, 2., -10, circle::TensorType_UINT8, 1., 128);
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(
+ TestCaseData{}
+ .addInput<int8_t>({-10, -50, 2, 42, -46, 14, -50, -55, -48, -48, -54, -27, -66, -20, -3, 48})
+ .addOutput<uint8_t>({128, 48, 152, 232, 56, 176, 48, 38, 52, 52, 40, 94, 16, 108, 142, 244}));
+ _context->setBackends({"cpu"});
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Quantize_Uint8toInt16)
+{
+ CircleGen cgen =
+ genSimpleQuantizeModel(circle::TensorType_UINT8, 1., 128, circle::TensorType_INT16, 2., -10);
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Quantize_Int8toInt16)
+{
+ CircleGen cgen =
+ genSimpleQuantizeModel(circle::TensorType_INT8, 2., -10, circle::TensorType_INT16, 1., 128);
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
#include <memory>
-TEST_F(GenModelTest, OneOp_ResizeBilinear_SizeToConst)
+struct ResizeBilinearParam
{
+ TestCaseData tcd;
+ circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32;
+ float scale = 0.0f;
+ int64_t zero_point = 0;
+};
+
+class ResizeBilinearVariation : public GenModelTest,
+ public ::testing::WithParamInterface<ResizeBilinearParam>
+{
+};
+
+TEST_P(ResizeBilinearVariation, Test)
+{
+ auto ¶m = GetParam();
+
CircleGen cgen;
std::vector<int32_t> size_data{3, 3};
uint32_t size_buf = cgen.addBuffer(size_data);
int size = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, size_buf});
- int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+ int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
+ int out = cgen.addTensor({{1, 3, 3, 1}, param.data_type}, param.scale, param.zero_point);
cgen.addOperatorResizeBilinear({{in, size}, {out}});
cgen.setInputsAndOutputs({in}, {out});
_context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(
- uniformTCD<float>({{1, 1, 2, 2}}, {{1, 1, 1, 1.666666667, 1.666666667, 1.666666667, 2, 2, 2}}));
+ _context->addTestCase(param.tcd);
_context->setBackends({"acl_cl", "acl_neon", "cpu"});
SUCCEED();
}
+INSTANTIATE_TEST_CASE_P(
+ GenModelTest, ResizeBilinearVariation,
+ ::testing::Values(
+ // float value
+ ResizeBilinearParam{uniformTCD<float>({{1, 1, 2, 2}}, {{1, 1, 1, 1.666666667, 1.666666667,
+ 1.666666667, 2, 2, 2}})},
+ // uint8 value
+ ResizeBilinearParam{uniformTCD<uint8_t>({{3, 6, 9, 12}}, {{3, 5, 6, 7, 9, 10, 9, 11, 12}}),
+ circle::TensorType::TensorType_UINT8, 1.0, 0},
+ // int8 value
+ ResizeBilinearParam{uniformTCD<int8_t>({{-6, -3, 9, 12}}, {{-6, -4, -3, 4, 6, 7, 9, 11, 12}}),
+ circle::TensorType::TensorType_INT8, 1.0, 0}));
+
TEST_F(GenModelTest, OneOp_ResizeBilinear_SizeToVar)
{
CircleGen cgen;
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+struct SliceVariationParam
+{
+ std::vector<int32_t> input_shape;
+ std::vector<int32_t> begins;
+ std::vector<int32_t> sizes;
+ TestCaseData tcd;
+
+ circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32;
+ float scale = 0.0f;
+ int64_t zero_point = 0;
+ circle::TensorType begins_type = circle::TensorType::TensorType_INT32;
+};
+
+class SliceVariation : public GenModelTest,
+ public ::testing::WithParamInterface<SliceVariationParam>
+{
+};
+
+TEST_P(SliceVariation, Test)
+{
+ auto ¶m = GetParam();
+
+ CircleGen cgen;
+
+ int in = cgen.addTensor({param.input_shape, param.input_type}, param.scale, param.zero_point);
+ int out = cgen.addTensor({param.sizes, param.input_type}, param.scale, param.zero_point);
+ if (param.begins_type == circle::TensorType::TensorType_INT32)
+ {
+ uint32_t begins_buf = cgen.addBuffer(param.begins);
+ int rank = param.begins.size();
+ int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf});
+
+ uint32_t sizes_buf = cgen.addBuffer(param.sizes);
+ int sizes = cgen.addTensor({{rank}, param.begins_type, sizes_buf});
+
+ cgen.addOperatorSlice({{in, begins, sizes}, {out}});
+ }
+ else if (param.begins_type == circle::TensorType::TensorType_INT64)
+ {
+ std::vector<int64_t> begins_64(param.begins.size());
+ std::vector<int64_t> sizes_64(param.sizes.size());
+ for (int i = 0; i < param.begins.size(); i++)
+ {
+ begins_64[i] = param.begins[i];
+ sizes_64[i] = param.sizes[i];
+ }
+
+ uint32_t begins_buf = cgen.addBuffer(begins_64);
+ int rank = param.begins.size();
+ int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf});
+
+ uint32_t sizes_buf = cgen.addBuffer(sizes_64);
+ int sizes = cgen.addTensor({{rank}, param.begins_type, sizes_buf});
+
+ cgen.addOperatorSlice({{in, begins, sizes}, {out}});
+ }
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(param.tcd);
+
+ // acl don't support int64 yet
+ if (param.begins_type == circle::TensorType::TensorType_INT64)
+ {
+ _context->setBackends({"cpu"});
+ }
+ else
+ {
+ _context->setBackends({"cpu", "acl_cl", "acl_neon"});
+ }
+
+ SUCCEED();
+}
+
+INSTANTIATE_TEST_CASE_P(
+ GenModelTest, SliceVariation,
+ ::testing::Values(
+ SliceVariationParam{
+ {2, 2, 3, 1},
+ {0, 1, 1, 0},
+ {1, 1, 2, 1},
+ uniformTCD<float>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}})},
+ SliceVariationParam{
+ {2, 2, 3, 1},
+ {0, 1, 1, 0},
+ {1, 1, 2, 1},
+ uniformTCD<uint8_t>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}}),
+ circle::TensorType::TensorType_UINT8,
+ 1,
+ 0},
+ SliceVariationParam{
+ {2, 2, 3, 1},
+ {0, 1, 1, 0},
+ {1, 1, 2, 1},
+ uniformTCD<float>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}}),
+ circle::TensorType::TensorType_FLOAT32,
+ 0,
+ 0,
+ circle::TensorType::TensorType_INT64}));
+
+TEST_F(GenModelTest, neg_OneOp_Slice_Type)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+ std::vector<float> begins_data = {0, 0, 1, 0};
+ uint32_t begins_buf = cgen.addBuffer(begins_data);
+ int begins = cgen.addTensor({{4}, circle::TensorType::TensorType_FLOAT32, begins_buf});
+ std::vector<float> sizes_data = {1, 2, 1, 1};
+ uint32_t sizes_buf = cgen.addBuffer(sizes_data);
+ int sizes = cgen.addTensor({{4}, circle::TensorType::TensorType_FLOAT32, sizes_buf});
+ int out = cgen.addTensor({{1, 2, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorSlice({{in, begins, sizes}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Slice_DiffType)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+ std::vector<int32_t> begins_data = {0, 0, 1, 0};
+ uint32_t begins_buf = cgen.addBuffer(begins_data);
+ int begins = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, begins_buf});
+ std::vector<int64_t> sizes_data = {1, 2, 1, 1};
+ uint32_t sizes_buf = cgen.addBuffer(sizes_data);
+ int sizes = cgen.addTensor({{4}, circle::TensorType::TensorType_INT64, sizes_buf});
+ int out = cgen.addTensor({{1, 2, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorSlice({{in, begins, sizes}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+// beta = 0.1
+// input/output shape: {1, 2, 1, 4}
+struct SoftmaxParam
+{
+ TestCaseData tcd;
+ circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32;
+ float input_scale = 0.0f;
+ int64_t input_zero_point = 0;
+};
+
+class SoftmaxVariation : public GenModelTest, public ::testing::WithParamInterface<SoftmaxParam>
+{
+};
+
+TEST_P(SoftmaxVariation, Test)
+{
+ auto ¶m = GetParam();
+
+ CircleGen cgen;
+
+ // NNAPI spec and tflite test use fixed output scale and zero-point
+ float out_scale = 0.0;
+ int64_t out_zero_point = 0;
+ if (param.data_type == circle::TensorType::TensorType_UINT8)
+ {
+ out_scale = 1.0f / 256;
+ }
+ else if (param.data_type == circle::TensorType::TensorType_INT8)
+ {
+ out_scale = 1.0f / 256;
+ out_zero_point = -128;
+ }
+
+ int input =
+ cgen.addTensor({{1, 2, 1, 4}, param.data_type}, param.input_scale, param.input_zero_point);
+ int out = cgen.addTensor({{1, 2, 1, 4}, param.data_type}, out_scale, out_zero_point);
+ cgen.addOperatorSoftmax({{input}, {out}}, 0.1);
+ cgen.setInputsAndOutputs({input}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(param.tcd);
+ _context->setBackends({"cpu", "acl_neon", "acl_cl"});
+
+ SUCCEED();
+}
+
+// Test with different value type
+INSTANTIATE_TEST_CASE_P(
+ GenModelTest, SoftmaxVariation,
+ ::testing::Values(
+ // float value
+ SoftmaxParam{
+ uniformTCD<float>({{0, -6, 2, 4, 3, -2, 10, 1}},
+ {{.23463, .12877, .28658, .35003, .22528, .13664, .45365, .18443}})},
+ // uint8 value
+ SoftmaxParam{
+ uniformTCD<uint8_t>({{10, 4, 12, 14, 13, 8, 20, 11}}, {{60, 33, 73, 90, 58, 35, 116, 47}}),
+ circle::TensorType::TensorType_UINT8, 1.0, 10},
+ // int8 value
+ SoftmaxParam{
+ uniformTCD<int8_t>({{0, -6, 2, 4, 3, -2, 10, 1}}, {{-68, -95, -55, -38, -70, -93, -12, -81}}),
+ circle::TensorType::TensorType_INT8, 1.0, 0}));
+
+TEST_F(GenModelTest, neg_OneOp_Softmax_Type)
+{
+ CircleGen cgen;
+ int input = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_INT8}, 1.0, 0);
+ cgen.addOperatorSoftmax({{input}, {out}}, 0.1);
+ cgen.setInputsAndOutputs({input}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_Sub_Uint8_VarVar)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 3);
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 2.0, 1);
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.5, 2);
+ cgen.addOperatorSub({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<uint8_t>({{13, 12, 25, 40}, {5, 4, 7, 0}}, {{6, 8, 22, 80}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Sub_Int8_VarVar)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 1.0, 2);
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 2.0, 3);
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.5, -6);
+ cgen.addOperatorSub({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5, -4, -7, 4}}, {{-16, 24, 34, -6}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_SubBroadcast_Uint8_VarVar)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 3);
+ int rhs = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_UINT8}, 2.0, 1);
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.5, 2);
+ cgen.addOperatorSub({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<uint8_t>({{13, 12, 25, 40}, {5}}, {{6, 4, 30, 60}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_SubBroadcast_Int8_VarVar)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 1.0, 2);
+ int rhs = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_INT8}, 2.0, 3);
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.5, -6);
+ cgen.addOperatorSub({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5}}, {{-16, -12, -14, -10}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Sub_InvalidType)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 2);
+ int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorSub({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Sub_InvalidShape)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorSub({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailCompile();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Sub_OneOperand)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorSub({{in}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Sub_ThreeOperands)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorSub({{in, in, in}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
*/
#include "GenModelTest.h"
+#include "WhileTestModel.h"
#include <memory>
TEST_F(GenModelTest, OneOp_While)
{
- // The model looks just like the below pseudocode
- //
- // function model(x)
- // {
- // while (x < 100.0)
- // {
- // x = x + 10.0;
- // }
- // return x
- // }
-
- CircleGen cgen;
- std::vector<float> incr_data{10};
- uint32_t incr_buf = cgen.addBuffer(incr_data);
- std::vector<float> end_data{100};
- uint32_t end_buf = cgen.addBuffer(end_data);
-
- // primary subgraph
- {
- int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- cgen.addOperatorWhile({{x_in}, {x_out}}, 1, 2);
- cgen.setInputsAndOutputs({x_in}, {x_out});
- }
-
- // cond subgraph
- {
- cgen.nextSubgraph();
- int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32, end_buf});
- int result = cgen.addTensor({{1}, circle::TensorType_BOOL});
- cgen.addOperatorLess({{x, end}, {result}});
- cgen.setInputsAndOutputs({x}, {result});
- }
-
- // body subgraph
- {
- cgen.nextSubgraph();
- int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf});
- int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({x_in}, {x_out});
- }
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ WhileModelLoop10 model;
+ _context = std::make_unique<GenModelTestContext>(std::move(model.cbuf));
_context->addTestCase(uniformTCD<float>({{0}}, {{100}}));
_context->addTestCase(uniformTCD<float>({{2}}, {{102}}));
_context->addTestCase(uniformTCD<float>({{22}}, {{102}}));
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_API_TEST_WHILE_TEST_MODEL_H__
+#define __NNFW_API_TEST_WHILE_TEST_MODEL_H__
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+class WhileModelLoop10
+{
+public:
+ WhileModelLoop10()
+ {
+ // The model looks just like the below pseudocode
+ //
+ // function model(x)
+ // {
+ // while (x < 100.0)
+ // {
+ // x = x + 10.0;
+ // }
+ // return x
+ // }
+ CircleGen cgen;
+ std::vector<float> incr_data{10};
+ uint32_t incr_buf = cgen.addBuffer(incr_data);
+ std::vector<float> end_data{100};
+ uint32_t end_buf = cgen.addBuffer(end_data);
+
+ // primary subgraph
+ {
+ int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ cgen.addOperatorWhile({{x_in}, {x_out}}, 1, 2);
+ cgen.setInputsAndOutputs({x_in}, {x_out});
+ }
+
+ // cond subgraph
+ {
+ cgen.nextSubgraph();
+ int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32, end_buf});
+ int result = cgen.addTensor({{1}, circle::TensorType_BOOL});
+ cgen.addOperatorLess({{x, end}, {result}});
+ cgen.setInputsAndOutputs({x}, {result});
+ }
+
+ // body subgraph
+ {
+ cgen.nextSubgraph();
+ int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf});
+ int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({x_in}, {x_out});
+ }
+ cbuf = cgen.finish();
+ }
+
+ int inputCount() { return 1; }
+ int outputputCount() { return 1; }
+ int sizeOfDType() { return sizeof(float); }
+
+ CircleBuffer cbuf;
+};
+
+#endif // __NNFW_API_TEST_WHILE_TEST_MODEL_H__
file(GLOB TFLITE_CONFIG_DIR models/tflite)
install(DIRECTORY ${TFLITE_CONFIG_DIR} DESTINATION test/models)
-# Install nnpackage test config
-file(GLOB NNPACKAGE_MODEL_CONFIG_DIR models/nnfw_api_gtest)
-install(DIRECTORY ${NNPACKAGE_MODEL_CONFIG_DIR} DESTINATION test/models)
-
# Install test list
file(GLOB TEST_LIST_DIR list)
install(DIRECTORY ${TEST_LIST_DIR} DESTINATION test)
$BRIDGE shell rm $TEST_ROOT/nnpkg.tar.gz
# 1. Run
-$BRIDGE shell LD_LIBRARY_PATH=$TEST_ROOT/Product/out/lib OP_SEQ_MAX_NODE=1 TRACE_FILEPATH=$TEST_ROOT/trace.json BACKENDS=$BACKENDS $TEST_ROOT/Product/out/bin/nnpackage_run --nnpackage $NNPKG_PATH_TARGET -r $NUM_RUNS
+$BRIDGE shell LD_LIBRARY_PATH=$TEST_ROOT/Product/out/lib TRACE_FILEPATH=$TEST_ROOT/trace.json BACKENDS=$BACKENDS $TEST_ROOT/Product/out/bin/nnpackage_run --nnpackage $NNPKG_PATH_TARGET -r $NUM_RUNS
# 2. Pull result file
echo "Pulling data from target to trace.json"
INSTALL_DIR="$(dirname $(dirname $COMMAND_DIR))"
MD5_CHECK="on"
-DOWNLOAD_MODEL="all"
function Usage()
{
echo ""
echo "Options:"
echo " --ignoremd5 Ignore MD5 check when download model files"
- echo " --model=(all|nnpackage|tflite) Download test model (default=all)"
+ echo " --model=(all|nnpackage|tflite) Download test model (deprecated option: always all)"
}
for i in "$@"
MD5_CHECK="off"
;;
--model=*)
- DOWNLOAD_MODEL=${i#*=}
+ # deprecated
;;
*)
echo "Unknown option: $i"
fi
echo "Download from $MODELFILE_SERVER"
-if [[ $DOWNLOAD_MODEL == "all" ]] || [[ $DOWNLOAD_MODEL == "tflite" ]]; then
- # Download tflite models
- $INSTALL_DIR/test/models/run_test.sh --download=on --run=off --md5=$MD5_CHECK
-fi
-
-if [[ $DOWNLOAD_MODEL == "all" ]] || [[ $DOWNLOAD_MODEL == "nnpackage" ]]; then
- # Download nnpackage model
- NNPACKAGE_CONFIG_DIR=$INSTALL_DIR/test/models/nnfw_api_gtest/
- NNPACKAGE_CACHE_DIR=$INSTALL_DIR/unittest_standalone/nnfw_api_gtest_models/
- $INSTALL_DIR/test/models/run_test.sh --download=on --run=off --md5=$MD5_CHECK \
- --configdir=$NNPACKAGE_CONFIG_DIR --cachedir=$NNPACKAGE_CACHE_DIR
-fi
+$INSTALL_DIR/test/models/run_test.sh --download=on --run=off --md5=$MD5_CHECK
INSTALL_DIR="$(dirname $(dirname $COMMAND_DIR))"
MD5_CHECK="on"
-TFLITE_LOADER="nnapi"
+TFLITE_LOADER="loader"
REPORT_DIR="report"
TEST_LIST_FILE=
TEST_DRIVER=nnapi_test
elif [[ $TFLITE_LOADER == "loader" ]]; then
TEST_NAME="Loader Verification"
- TEST_DRIVER=tflite_loader_test_tool
+ TEST_DRIVER=tflite_comparator
else
Usage
exit 1
+++ /dev/null
-MODELS/inception_module
-MODELS/mobilenet
-add
-average_pool_2d
-batch_to_space_nd2
-cast
-concat
-conv_2d
-custom
-depthwise_conv_2d
-div
-embedding_lookup
-exp
-floor
-fullyconnected
-gather
-hashtable_lookup
-l2_normalization
-l2_pool_2d
-max
-max_pool_2d
-mean
-min
-mul
-neg
-pack
-pad
-reduce_max
-reduce_mean
-reduce_sum/float
-relu
-relu6
-reshape
-resize_bilinear
-rsqrt
-slice
-softmax
-space_to_batch_nd2
-space_to_depth
-squeeze
-strided_slice
-sub
-tanh
-topk_v2
-transpose
-transpose_conv
+++ /dev/null
-MODELS/inception_module
-MODELS/mobilenet
-add
-average_pool_2d
-cast
-concat
-conv_2d
-depthwise_conv_2d
-div
-embedding_lookup
-floor
-gather
-hashtable_lookup
-l2_normalization
-l2_pool_2d
-logistic
-max
-max_pool_2d
-mean
-min
-mul
-one_hot
-pack
-pad
-reduce_max
-reduce_mean
-relu
-relu6
-reshape
-resize_bilinear
-rsqrt
-slice
-softmax
-space_to_depth
-sqrt
-squeeze
-strided_slice
-sub
-tanh
-transpose
-transpose_conv
+++ /dev/null
-MODELS/inception_module
-MODELS/mobilenet
-add
-average_pool_2d
-cast
-concat
-conv_2d
-depthwise_conv_2d
-div
-equal
-exp
-fullyconnected
-greater
-greater_equal
-less
-less_equal
-logistic
-max
-max_pool_2d
-min
-mul
-neg
-not_equal
-one_hot
-pack
-reduce_max
-reduce_sum
-reshape/reshape1
-select
-softmax
-squeeze
-sub
-tanh
-tile
-transpose
-zeros_like
+++ /dev/null
-MODELS/inception_module
-MODELS/mobilenet
-add
-average_pool_2d
-batch_to_space_nd2
-cast
-concat
-conv_2d
-custom
-depthwise_conv_2d
-div
-embedding_lookup
-exp
-floor
-fullyconnected
-gather
-hashtable_lookup
-l2_normalization
-l2_pool_2d
-max
-max_pool_2d
-mean
-min
-mul
-neg
-pack
-pad
-reduce_max
-reduce_mean
-reduce_sum/float
-relu
-relu6
-reshape
-resize_bilinear
-rsqrt
-slice
-softmax
-space_to_batch_nd2
-space_to_depth
-squeeze
-strided_slice
-sub
-tanh
-topk_v2
-transpose
-transpose_conv
+++ /dev/null
-MODELS/inception_module
-MODELS/mobilenet
-add
-average_pool_2d
-cast
-concat
-conv_2d
-depthwise_conv_2d
-div
-embedding_lookup
-floor
-fullyconnected
-gather
-hashtable_lookup
-l2_normalization
-l2_pool_2d
-logistic
-max
-max_pool_2d
-mean
-min
-mul
-one_hot
-pack
-pad
-reduce_max
-reduce_mean
-relu
-relu6
-reshape
-resize_bilinear
-rsqrt
-slice
-softmax
-space_to_depth
-sqrt
-squeeze
-strided_slice
-sub
-tanh
-transpose
-transpose_conv
+++ /dev/null
-MODELS/inception_module
-MODELS/mobilenet
-MODELS/mobilenet_quant8
-abs
-add
-average_pool_2d
-cast
-concat
-conv_2d
-depthwise_conv_2d
-div
-equal
-exp
-fullyconnected
-greater
-greater_equal
-less
-less_equal
-logistic
-max
-max_pool_2d
-mean
-min
-mul
-neg
-not_equal
-one_hot
-pack
-reduce_max
-reduce_sum
-reshape/reshape1
-rsqrt
-select
-shape
-sin
-slice
-strided_slice
-softmax
-squeeze
-sub
-tanh
-tile
-transpose
-zeros_like
+++ /dev/null
-MODELS/inception_module
-MODELS/mobilenet
-add
-average_pool_2d
-concat
-conv_2d
-depthwise_conv_2d
-fullyconnected/fc1
-logistic
-max_pool_2d
-pad
-relu
-relu6
-reshape/reshape1
-softmax
-tanh
+++ /dev/null
-MODELS/inception_module
-MODELS/mobilenet
-MODELS/mobilenet_quant8
-add
-average_pool_2d
-concat
-conv_2d
-depthwise_conv_2d
-fullyconnected
-logistic
-max_pool_2d
-mean
-reduce_max
-reduce_sum
-reshape/reshape1
-select
-softmax
-squeeze
-tile
-transpose
-zeros_like
--- /dev/null
+MODELS/inception_module
+MODELS/mobilenet
+add
+average_pool_2d
+batch_to_space_nd2
+cast
+concat
+conv_2d
+custom
+depthwise_conv_2d
+div
+embedding_lookup
+exp
+floor
+fullyconnected
+gather
+hashtable_lookup
+l2_normalization
+l2_pool_2d
+max
+max_pool_2d
+mean
+min
+mul
+neg
+pack
+pad
+reduce_max
+reduce_mean
+reduce_sum/float
+relu
+relu6
+reshape
+resize_bilinear
+rsqrt
+slice
+softmax
+space_to_batch_nd2
+space_to_depth
+squeeze
+strided_slice
+sub
+tanh
+topk_v2
+transpose
+transpose_conv
--- /dev/null
+MODELS/inception_module
+MODELS/mobilenet
+add
+average_pool_2d
+batch_to_space_nd2
+cast
+concat
+conv_2d
+custom
+depthwise_conv_2d
+div
+embedding_lookup
+exp
+floor
+fullyconnected
+gather
+hashtable_lookup
+l2_normalization
+l2_pool_2d
+max
+max_pool_2d
+mean
+min
+mul
+neg
+pack
+pad
+reduce_max
+reduce_mean
+reduce_sum/float
+relu
+relu6
+reshape
+resize_bilinear
+rsqrt
+slice
+softmax
+space_to_batch_nd2
+space_to_depth
+squeeze
+strided_slice
+sub
+tanh
+topk_v2
+transpose
+transpose_conv
--- /dev/null
+MODELS/inception_module
+MODELS/mobilenet
+add
+average_pool_2d
+batch_to_space_nd2
+cast
+concat
+conv_2d
+depthwise_conv_2d
+div
+exp
+floor
+fullyconnected
+gather
+l2_normalization
+max
+max_pool_2d
+mean
+min
+mul
+neg
+pack
+pad
+reduce_max
+reduce_mean
+reduce_sum/float
+relu
+relu6
+reshape
+resize_bilinear
+rsqrt
+slice
+softmax
+space_to_batch_nd2
+space_to_depth
+squeeze
+strided_slice
+sub
+tanh
+transpose
+transpose_conv
--- /dev/null
+MODELS/inception_module
+MODELS/mobilenet
+add
+average_pool_2d
+cast
+concat
+conv_2d
+depthwise_conv_2d
+div
+floor
+gather
+l2_normalization
+logistic
+max
+max_pool_2d
+mean
+min
+mul
+one_hot
+pack
+pad
+reduce_max
+reduce_mean
+relu
+relu6
+reshape
+resize_bilinear
+rsqrt
+slice
+softmax
+space_to_depth
+sqrt
+squeeze
+strided_slice
+sub
+tanh
+transpose
+transpose_conv
--- /dev/null
+MODELS/inception_module
+MODELS/mobilenet
+add
+average_pool_2d
+cast
+concat
+conv_2d
+depthwise_conv_2d
+div
+equal
+exp
+fullyconnected
+greater
+greater_equal
+less
+less_equal
+logistic
+max
+max_pool_2d
+min
+mul
+neg
+not_equal
+one_hot
+pack
+reduce_max
+reduce_sum
+reshape/reshape1
+select
+softmax
+squeeze
+sub
+tanh
+tile
+transpose
+zeros_like
--- /dev/null
+MODELS/inception_module
+MODELS/mobilenet
+add
+average_pool_2d
+batch_to_space_nd2
+cast
+concat
+conv_2d
+depthwise_conv_2d
+div
+exp
+floor
+fullyconnected
+gather
+l2_normalization
+max
+max_pool_2d
+mean
+min
+mul
+neg
+pack
+pad
+reduce_max
+reduce_mean
+reduce_sum/float
+relu
+relu6
+reshape
+resize_bilinear
+rsqrt
+slice
+softmax
+space_to_batch_nd2
+space_to_depth
+squeeze
+strided_slice
+sub
+tanh
+transpose
+transpose_conv
--- /dev/null
+MODELS/inception_module
+MODELS/mobilenet
+add
+average_pool_2d
+cast
+concat
+conv_2d
+depthwise_conv_2d
+div
+floor
+fullyconnected
+gather
+l2_normalization
+logistic
+max
+max_pool_2d
+mean
+min
+mul
+one_hot
+pack
+pad
+reduce_max
+reduce_mean
+relu
+relu6
+reshape
+resize_bilinear
+rsqrt
+slice
+softmax
+space_to_depth
+sqrt
+squeeze
+strided_slice
+sub
+tanh
+transpose
+transpose_conv
--- /dev/null
+MODELS/inception_module
+MODELS/mobilenet
+MODELS/mobilenet_quant8
+abs
+add
+average_pool_2d
+cast
+concat
+conv_2d
+depthwise_conv_2d
+div
+equal
+exp
+fullyconnected
+greater
+greater_equal
+less
+less_equal
+logistic
+max
+max_pool_2d
+mean
+min
+mul
+neg
+not_equal
+one_hot
+pack
+reduce_max
+reduce_sum
+reshape/reshape1
+rsqrt
+select
+shape
+sin
+slice
+strided_slice
+softmax
+squeeze
+sub
+tanh
+tile
+transpose
+zeros_like
--- /dev/null
+MODELS/inception_module
+MODELS/mobilenet
+add
+average_pool_2d
+concat
+conv_2d
+depthwise_conv_2d
+fullyconnected/fc1
+logistic
+max_pool_2d
+pad
+relu
+relu6
+reshape/reshape1
+softmax
+tanh
--- /dev/null
+MODELS/inception_module
+MODELS/mobilenet
+MODELS/mobilenet_quant8
+add
+average_pool_2d
+concat
+conv_2d
+depthwise_conv_2d
+fullyconnected
+logistic
+max_pool_2d
+mean
+reduce_max
+reduce_sum
+reshape/reshape1
+select
+softmax
+squeeze
+tile
+transpose
+zeros_like
+++ /dev/null
-MODELS/inception_module
-MODELS/mobilenet
-add
-average_pool_2d
-batch_to_space_nd2
-concat
-conv_2d/convolution1
-depthwise_conv_2d
-div
-exp
-fullyconnected/fc1
-logistic
-max
-max_pool_2d/maxpool1
-mean
-min
-mul
-pack
-pad
-reduce_max
-reduce_sum/float
-relu
-relu6
-reshape/reshape1
-resize_bilinear
-rsqrt
-slice
-softmax
-space_to_batch_nd2
-sqrt
-squeeze
-sub
-tanh
-transpose
-transpose_conv
+++ /dev/null
-MODELS/inception_module
-MODELS/mobilenet
-add
-average_pool_2d
-batch_to_space_nd2
-concat
-conv_2d/convolution1
-depthwise_conv_2d
-div
-exp
-fullyconnected/fc1
-logistic
-max
-max_pool_2d/maxpool1
-mean
-min
-mul
-pack
-pad
-reduce_max
-reduce_sum/float
-relu
-relu6
-reshape/reshape1
-resize_bilinear
-rsqrt
-slice
-softmax
-space_to_batch_nd2
-sqrt
-squeeze
-sub
-tanh
-transpose
-transpose_conv
+++ /dev/null
-MODELFILE_NAME="add.zip"
+++ /dev/null
-MODELFILE_NAME="add_invalid_manifest.zip"
+++ /dev/null
-MODELFILE_NAME="add_no_manifest.zip"
+++ /dev/null
-MODELFILE_NAME="if_dynamic.zip"
+++ /dev/null
-MODELFILE_NAME="while_dynamic.zip"
private:
void *data_;
};
-} // end of namespace
+} // namespace nnpkg_run
#endif // __NNPACKAGE_RUN_ALLOCATION_H__
throw std::runtime_error(
"model input type is qasymm8, bool or uint8. But h5 data type is different.");
break;
+ case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+ if (type == H5::PredType::STD_I8BE || type == H5::PredType::STD_I8LE)
+ data_set.read(inputs[i].data(), H5::PredType::NATIVE_INT8);
+ else
+ throw std::runtime_error("model input type is int8. But h5 data type is different.");
+ break;
default:
throw std::runtime_error("nnpkg_run can load f32, i32, qasymm8, bool and uint8.");
}
data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT8);
break;
}
+ case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+ {
+ H5::DataSet data_set =
+ value_group.createDataSet(std::to_string(i), H5::PredType::STD_I8LE, data_space);
+ data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT8);
+ break;
+ }
default:
throw std::runtime_error("nnpkg_run can dump f32, i32, qasymm8, bool and uint8.");
}
private:
nnfw_session *session_;
};
-} // end of namespace
+} // namespace nnpkg_run
#endif // __NNPACKAGE_RUN_H5FORMATTER_H__
sizeof(bool), /* NNFW_TYPE_TENSOR_BOOL = 3 */
sizeof(uint8_t), /* NNFW_TYPE_TENSOR_UINT8 = 4 */
sizeof(int64_t), /* NNFW_TYPE_TENSOR_INT64 = 5 */
-
+ sizeof(int8_t), /* NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED = 6 */
};
return elmsize[ti->dtype] * num_elems(ti);
}
-} // end of namespace
+} // namespace nnpkg_run
nnfw_tensorinfo ti;
NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, i, &ti));
- if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_INT64)
+ if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED)
{
std::cerr << "E: not supported input type" << std::endl;
exit(-1);
nnfw_tensorinfo ti;
NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
- if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_INT64)
+ if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED)
{
std::cerr << "E: not supported output type" << std::endl;
exit(-1);
private:
nnfw_session *session_;
};
-} // end of namespace
+} // namespace nnpkg_run
#endif // __NNPACKAGE_RUN_RANDOMGEN_H__
--- /dev/null
+if(NOT BUILD_TFLITE_COMPARATOR_TEST_TOOL)
+ message("skipping tflite comparator tool build")
+ return()
+endif(NOT BUILD_TFLITE_COMPARATOR_TEST_TOOL)
+
+if(NOT BUILD_ONERT)
+ message("skipping tflite comparator tool build: onert is not built")
+ return()
+endif(NOT BUILD_ONERT)
+
+list(APPEND SOURCES "src/tflite_comparator.cc")
+list(APPEND SOURCES "src/args.cc")
+
+nnfw_find_package(Boost REQUIRED program_options system filesystem)
+
+add_executable(tflite_comparator ${SOURCES})
+target_include_directories(tflite_comparator PRIVATE ${Boost_INCLUDE_DIRS})
+
+target_link_libraries(tflite_comparator nnfw-dev)
+target_link_libraries(tflite_comparator nnfw_lib_tflite nnfw_lib_misc)
+target_link_libraries(tflite_comparator ${Boost_PROGRAM_OPTIONS_LIBRARY} ${Boost_SYSTEM_LIBRARY} ${Boost_FILESYSTEM_LIBRARY})
+
+install(TARGETS tflite_comparator DESTINATION bin)
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "args.h"
+
+#include <iostream>
+
+#include <boost/filesystem.hpp>
+
+namespace TFLiteRun
+{
+
+Args::Args(const int argc, char **argv) noexcept
+{
+ Initialize();
+ Parse(argc, argv);
+}
+
+void Args::Initialize(void)
+{
+ // General options
+ po::options_description general("General options");
+
+ // clang-format off
+ general.add_options()
+ ("help,h", "Display available options")
+ ("tflite", po::value<std::string>()->default_value("")->required(), "Input tflite model file for serialization")
+ ("data,d", po::value<std::vector<std::string>>()->multitoken()->default_value(std::vector<std::string>{}, ""), "Input data file for model");
+ // clang-format on
+
+ _options.add(general);
+ _positional.add("tflite", 1);
+}
+
+void Args::print(char **argv)
+{
+ std::cout << "tflite_comparator" << std::endl << std::endl;
+ std::cout << "Load tflite model by onert and TFLite, and compare their output" << std::endl;
+ std::cout << "Usage:" << std::endl;
+ std::cout << argv[0] << " --tflite model_file.tflite --data input_data.dat" << std::endl;
+ std::cout << _options;
+ std::cout << std::endl;
+}
+
+void Args::Parse(const int argc, char **argv)
+{
+ po::variables_map vm;
+ po::store(po::command_line_parser(argc, argv).options(_options).positional(_positional).run(),
+ vm);
+ po::notify(vm);
+
+ if (vm.count("help"))
+ {
+ print(argv);
+
+ exit(0);
+ }
+
+ try
+ {
+ if (vm.count("tflite"))
+ {
+ _tflite_filename = vm["tflite"].as<std::string>();
+ }
+
+ if (vm.count("data"))
+ {
+ _data_filenames = vm["data"].as<std::vector<std::string>>();
+ }
+ }
+ catch (const std::bad_cast &e)
+ {
+ std::cerr << e.what() << '\n';
+ print(argv);
+ exit(1);
+ }
+}
+
+} // end of namespace TFLiteRun
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_LOADER_TOOLS_SRC_ARGS_H__
+#define __TFLITE_LOADER_TOOLS_SRC_ARGS_H__
+
+#include <string>
+#include <boost/program_options.hpp>
+
+namespace po = boost::program_options;
+
+namespace TFLiteRun
+{
+
+class Args
+{
+public:
+ Args(const int argc, char **argv) noexcept;
+ void print(char **argv);
+
+ const std::string &getTFLiteFilename(void) const { return _tflite_filename; }
+ const std::vector<std::string> &getDataFilenames(void) const { return _data_filenames; }
+
+private:
+ void Initialize();
+ void Parse(const int argc, char **argv);
+
+private:
+ po::options_description _options;
+ po::positional_options_description _positional;
+
+ std::string _tflite_filename;
+ std::vector<std::string> _data_filenames;
+};
+
+} // namespace TFLiteRun
+
+#endif // __TFLITE_LOADER_TOOLS_SRC_ARGS_H__
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "args.h"
+
+#include <nnfw_experimental.h>
+#include <nnfw_internal.h>
+
+#include <misc/EnvVar.h>
+#include <misc/fp32.h>
+#include <misc/RandomGenerator.h>
+
+#include <tflite/Assert.h>
+#include <tflite/InterpreterSession.h>
+#include <tflite/ext/kernels/register.h>
+
+#include <iostream>
+#include <fstream>
+#include <memory>
+
+const int RUN_FAILED = 1;
+
+using namespace tflite;
+using namespace nnfw::tflite;
+
+const int FILE_ERROR = 2;
+
+#define NNFW_ASSERT_FAIL(expr, msg) \
+ if ((expr) != NNFW_STATUS_NO_ERROR) \
+ { \
+ std::cerr << msg << std::endl; \
+ exit(-1); \
+ }
+
+// Read vector of floats from selected file
+void readData(const string &path, std::vector<uint8_t> &dest)
+{
+ std::ifstream in(path);
+ if (!in.good())
+ {
+ std::cerr << "can not open data file " << path << "\n";
+ exit(FILE_ERROR);
+ }
+ in.seekg(0, std::ifstream::end);
+ size_t len = in.tellg();
+ in.seekg(0, std::ifstream::beg);
+
+ assert(dest.size() == len);
+ in.read(reinterpret_cast<char *>(dest.data()), len);
+}
+
+template <typename T>
+void randomData(nnfw::misc::RandomGenerator &randgen, std::vector<uint8_t> &dest)
+{
+ size_t elements = dest.size() / sizeof(T);
+ assert(dest.size() % sizeof(T) == 0);
+
+ std::vector<T> vec(elements);
+ for (uint64_t i = 0; i < elements; i++)
+ {
+ vec[i] = randgen.generate<T>();
+ }
+ memcpy(dest.data(), vec.data(), elements * sizeof(T));
+}
+
+void randomBoolData(nnfw::misc::RandomGenerator &randgen, std::vector<uint8_t> &dest)
+{
+ size_t elements = dest.size();
+ std::vector<uint8_t> vec(elements);
+ for (uint64_t i = 0; i < elements; i++)
+ {
+ bool value = randgen.generate<bool>();
+ dest[i] = value ? 1 : 0;
+ }
+}
+
+inline uint64_t num_elems(const nnfw_tensorinfo *ti)
+{
+ uint64_t n = 1;
+ for (uint32_t i = 0; i < ti->rank; ++i)
+ {
+ n *= ti->dims[i];
+ }
+ return n;
+}
+
+inline size_t sizeOfNnfwType(NNFW_TYPE type)
+{
+ switch (type)
+ {
+ case NNFW_TYPE_TENSOR_BOOL:
+ case NNFW_TYPE_TENSOR_UINT8:
+ case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+ case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+ return 1;
+ case NNFW_TYPE_TENSOR_FLOAT32:
+ case NNFW_TYPE_TENSOR_INT32:
+ return 4;
+ case NNFW_TYPE_TENSOR_INT64:
+ return 8;
+ default:
+ throw std::runtime_error{"Invalid tensor type"};
+ }
+}
+
+template <typename T>
+bool compareBuffersExact(const T *ref_buf, const std::vector<uint8_t> &act_buf, uint32_t index)
+{
+ bool match = true;
+ for (uint32_t e = 0; e < act_buf.size() / sizeof(T); e++)
+ {
+ T ref = ref_buf[e];
+ T act = reinterpret_cast<const T *>(act_buf.data())[e];
+
+ if (ref != act)
+ {
+ std::cerr << "Output #" << index << ", Element Index : " << e << ", ref: " << ref
+ << ", act: " << act << std::endl;
+ match = false;
+ }
+ }
+
+ return match;
+}
+
+bool compareBuffersExactBool(const uint8_t *ref_buf, const std::vector<uint8_t> &act_buf,
+ uint32_t index)
+{
+ bool match = true;
+ for (uint32_t e = 0; e < act_buf.size() / sizeof(uint8_t); e++)
+ {
+ uint8_t ref_raw = ref_buf[e];
+ bool ref = (ref_raw != 0 ? true : false);
+ uint8_t act_raw = reinterpret_cast<const uint8_t *>(act_buf.data())[e];
+ bool act = (act_raw != 0 ? true : false);
+ if (ref != act)
+ {
+ std::cerr << "Output #" << index << ", Element Index : " << e << ", ref: " << ref
+ << ", act: " << act << std::endl;
+ match = false;
+ }
+ }
+
+ return match;
+}
+
+int main(const int argc, char **argv)
+{
+ TFLiteRun::Args args(argc, argv);
+
+ auto tflite_file = args.getTFLiteFilename();
+ auto data_files = args.getDataFilenames();
+
+ if (tflite_file.empty())
+ {
+ args.print(argv);
+ return RUN_FAILED;
+ }
+
+ std::cout << "[Execution] Stage start!" << std::endl;
+ // Loading
+ nnfw_session *onert_session = nullptr;
+ NNFW_ASSERT_FAIL(nnfw_create_session(&onert_session), "[ ERROR ] Failure during model load");
+ if (onert_session == nullptr)
+ {
+ std::cerr << "[ ERROR ] Failure to open session" << std::endl;
+ exit(-1);
+ }
+
+ NNFW_ASSERT_FAIL(nnfw_load_model_from_modelfile(onert_session, tflite_file.c_str()),
+ "[ ERROR ] Failure during model load");
+
+ uint32_t num_inputs;
+ uint32_t num_outputs;
+ NNFW_ASSERT_FAIL(nnfw_input_size(onert_session, &num_inputs),
+ "[ ERROR ] Failure during get model inputs");
+ NNFW_ASSERT_FAIL(nnfw_output_size(onert_session, &num_outputs),
+ "[ ERROR ] Failure during get model outputs");
+
+ std::cout << "[Execution] Model is deserialized!" << std::endl;
+
+ // Compile
+ nnfw_prepare(onert_session);
+
+ std::cout << "[Execution] Model compiled!" << std::endl;
+
+ // Prepare input/output data
+ std::vector<std::vector<uint8_t>> inputs(num_inputs);
+ std::vector<std::vector<uint8_t>> outputs(num_outputs);
+
+ bool generate_data = data_files.empty();
+ bool read_data = data_files.size() == num_inputs;
+ if (!generate_data && !read_data)
+ {
+ std::cerr << "[ ERROR ] "
+ << "Wrong number of input files." << std::endl;
+ exit(1);
+ }
+
+ const int seed = 1; /* TODO Add an option for seed value */
+ nnfw::misc::RandomGenerator randgen{seed, 0.0f, 2.0f};
+
+ for (uint32_t i = 0; i < num_inputs; i++)
+ {
+ nnfw_tensorinfo ti_input;
+ NNFW_ASSERT_FAIL(nnfw_input_tensorinfo(onert_session, i, &ti_input),
+ "[ ERROR ] Failure during get input data info");
+ size_t input_size = num_elems(&ti_input) * sizeOfNnfwType(ti_input.dtype);
+
+ inputs[i].resize(input_size);
+
+ if (generate_data)
+ {
+ switch (ti_input.dtype)
+ {
+ case NNFW_TYPE_TENSOR_BOOL:
+ randomBoolData(randgen, inputs[i]);
+ break;
+ case NNFW_TYPE_TENSOR_UINT8:
+ case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+ randomData<uint8_t>(randgen, inputs[i]);
+ break;
+ case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+ randomData<int8_t>(randgen, inputs[i]);
+ break;
+ case NNFW_TYPE_TENSOR_FLOAT32:
+ randomData<float>(randgen, inputs[i]);
+ break;
+ case NNFW_TYPE_TENSOR_INT32:
+ randomData<int32_t>(randgen, inputs[i]);
+ break;
+ case NNFW_TYPE_TENSOR_INT64:
+ randomData<uint64_t>(randgen, inputs[i]);
+ break;
+ default:
+ std::cerr << "[ ERROR ] "
+ << "Unspported input data type" << std::endl;
+ exit(-1);
+ break;
+ }
+ }
+ else /* read_data */
+ readData(data_files[i], inputs[i]);
+
+ NNFW_ASSERT_FAIL(nnfw_set_input(onert_session, i, ti_input.dtype, inputs[i].data(), input_size),
+ "[ ERROR ] Failure to set input tensor buffer");
+ }
+
+ std::cout << "[Execution] Input data is defined!" << std::endl;
+
+ for (uint32_t i = 0; i < num_outputs; i++)
+ {
+ nnfw_tensorinfo ti_output;
+ NNFW_ASSERT_FAIL(nnfw_output_tensorinfo(onert_session, i, &ti_output),
+ "[ ERROR ] Failure during get output tensor info");
+
+ uint64_t output_elements = num_elems(&ti_output);
+ size_t output_size = output_elements * sizeOfNnfwType(ti_output.dtype);
+ outputs[i].resize(output_size);
+
+ NNFW_ASSERT_FAIL(
+ nnfw_set_output(onert_session, i, ti_output.dtype, outputs[i].data(), output_size),
+ "[ ERROR ] Failure to set output tensor buffer");
+ }
+
+ // Execute
+ NNFW_ASSERT_FAIL(nnfw_run(onert_session), "[Execution] Can't execute");
+
+ std::cout << "[Execution] Done!" << std::endl;
+
+ // Compare with tflite
+ std::cout << "[Comparison] Stage start!" << std::endl;
+ // Read tflite model
+ StderrReporter error_reporter;
+ auto model = FlatBufferModel::BuildFromFile(tflite_file.c_str(), &error_reporter);
+
+ BuiltinOpResolver resolver;
+ InterpreterBuilder builder(*model, resolver);
+
+ std::unique_ptr<Interpreter> interpreter;
+ try
+ {
+ TFLITE_ENSURE(builder(&interpreter));
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << e.what() << std::endl;
+ exit(FILE_ERROR);
+ }
+ interpreter->SetNumThreads(nnfw::misc::EnvVar("THREAD").asInt(1));
+
+ auto sess = std::make_shared<nnfw::tflite::InterpreterSession>(interpreter.get());
+ sess->prepare();
+ // Set input and run
+ for (uint32_t i = 0; i < num_inputs; i++)
+ {
+ auto input_tensor = interpreter->tensor(interpreter->inputs().at(i));
+ memcpy(input_tensor->data.uint8, inputs[i].data(), inputs[i].size());
+ }
+ if (!sess->run())
+ {
+ std::cout << "[Comparison] TFLite run failed!" << std::endl;
+ assert(0 && "Run failed!");
+ }
+ std::cout << "[Comparison] TFLite run done!" << std::endl;
+
+ // Calculate max difference over all outputs
+ float max_float_difference = 0.0f;
+ bool find_unmatched_output = false;
+ auto tolerance = nnfw::misc::EnvVar("TOLERANCE").asInt(1);
+
+ for (uint32_t out_idx = 0; out_idx < num_outputs; out_idx++)
+ {
+ nnfw_tensorinfo ti;
+ nnfw_output_tensorinfo(onert_session, out_idx, &ti);
+
+ bool matched = true;
+ // Check output tensor values
+
+ const auto &ref_output = interpreter->tensor(interpreter->outputs().at(out_idx))->data;
+ const auto &output = outputs[out_idx];
+
+ switch (ti.dtype)
+ {
+ case NNFW_TYPE_TENSOR_BOOL:
+ matched = compareBuffersExactBool(ref_output.uint8, output, out_idx);
+ break;
+ case NNFW_TYPE_TENSOR_UINT8:
+ case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+ matched = compareBuffersExact<uint8_t>(ref_output.uint8, output, out_idx);
+ break;
+ case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+ matched = compareBuffersExact<int8_t>(ref_output.int8, output, out_idx);
+ break;
+ case NNFW_TYPE_TENSOR_INT32:
+ matched = compareBuffersExact<int32_t>(ref_output.i32, output, out_idx);
+ break;
+ case NNFW_TYPE_TENSOR_FLOAT32:
+ // TODO better way for handling FP error?
+ for (uint32_t e = 0; e < num_elems(&ti); e++)
+ {
+ float refval = ref_output.f[e];
+ float val = reinterpret_cast<const float *>(output.data())[e];
+ if (std::abs(refval - val) > max_float_difference)
+ max_float_difference = std::abs(refval - val);
+
+ matched = nnfw::misc::fp32::absolute_epsilon_equal(refval, val)
+ ? true
+ : nnfw::misc::fp32::epsilon_equal(refval, val, tolerance);
+ }
+ break;
+ case NNFW_TYPE_TENSOR_INT64:
+ matched = compareBuffersExact<int64_t>(ref_output.i64, output, out_idx);
+ break;
+ default:
+ throw std::runtime_error{"Invalid tensor type"};
+ }
+
+ if (!matched)
+ find_unmatched_output = true;
+ }
+
+ // Print results
+ std::cout << "[Comparison] Max float difference: " << max_float_difference << std::endl;
+ int ret = 0;
+ if (find_unmatched_output)
+ {
+ std::cout << "[Comparison] outputs is not equal!" << std::endl;
+ ret = 1;
+ }
+ else
+ {
+ std::cout << "[Comparison] Outputs is equal!" << std::endl;
+ }
+ std::cout << "[Comparison] Done!" << std::endl;
+
+ nnfw_close_session(onert_session);
+
+ return ret;
+}
+++ /dev/null
-if(NOT BUILD_TFLITE_LOADER_TEST_TOOL)
- message("skipping tflite loader tool build")
- return()
-endif(NOT BUILD_TFLITE_LOADER_TEST_TOOL)
-
-if(NOT BUILD_ONERT)
- message("skipping tflite loader tool build: onert is not built")
- return()
-endif(NOT BUILD_ONERT)
-
-list(APPEND SOURCES "src/tflite_loader.cc")
-list(APPEND SOURCES "src/args.cc")
-
-nnfw_find_package(Boost REQUIRED program_options system filesystem)
-
-add_executable(tflite_loader_test_tool ${SOURCES})
-target_include_directories(tflite_loader_test_tool PRIVATE ${Boost_INCLUDE_DIRS})
-
-target_link_libraries(tflite_loader_test_tool nnfw-dev)
-target_link_libraries(tflite_loader_test_tool nnfw_lib_tflite nnfw_lib_misc)
-target_link_libraries(tflite_loader_test_tool ${Boost_PROGRAM_OPTIONS_LIBRARY} ${Boost_SYSTEM_LIBRARY} ${Boost_FILESYSTEM_LIBRARY})
-
-install(TARGETS tflite_loader_test_tool DESTINATION bin)
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "args.h"
-
-#include <iostream>
-
-#include <boost/filesystem.hpp>
-
-namespace TFLiteRun
-{
-
-Args::Args(const int argc, char **argv) noexcept
-{
- Initialize();
- Parse(argc, argv);
-}
-
-void Args::Initialize(void)
-{
- // General options
- po::options_description general("General options");
-
- // clang-format off
- general.add_options()
- ("help,h", "Display available options")
- ("tflite", po::value<std::string>()->default_value("")->required(), "Input tflite model file for serialization")
- ("data,d", po::value<std::vector<std::string>>()->multitoken()->default_value(std::vector<std::string>{}, ""), "Input data file for model");
- // clang-format on
-
- _options.add(general);
- _positional.add("tflite", 1);
-}
-
-void Args::print(char **argv)
-{
- std::cout << "tflite_loader" << std::endl << std::endl;
- std::cout << "Load tflite model by Loader and TFLite and compare their output" << std::endl;
- std::cout << "Usage:" << std::endl;
- std::cout << argv[0] << " --tflite model_file.tflite --data input_data.dat" << std::endl;
- std::cout << _options;
- std::cout << std::endl;
-}
-
-void Args::Parse(const int argc, char **argv)
-{
- po::variables_map vm;
- po::store(po::command_line_parser(argc, argv).options(_options).positional(_positional).run(),
- vm);
- po::notify(vm);
-
- if (vm.count("help"))
- {
- print(argv);
-
- exit(0);
- }
-
- try
- {
- if (vm.count("tflite"))
- {
- _tflite_filename = vm["tflite"].as<std::string>();
- }
-
- if (vm.count("data"))
- {
- _data_filenames = vm["data"].as<std::vector<std::string>>();
- }
- }
- catch (const std::bad_cast &e)
- {
- std::cerr << e.what() << '\n';
- print(argv);
- exit(1);
- }
-}
-
-} // end of namespace TFLiteRun
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __TFLITE_LOADER_TOOLS_SRC_ARGS_H__
-#define __TFLITE_LOADER_TOOLS_SRC_ARGS_H__
-
-#include <string>
-#include <boost/program_options.hpp>
-
-namespace po = boost::program_options;
-
-namespace TFLiteRun
-{
-
-class Args
-{
-public:
- Args(const int argc, char **argv) noexcept;
- void print(char **argv);
-
- const std::string &getTFLiteFilename(void) const { return _tflite_filename; }
- const std::vector<std::string> &getDataFilenames(void) const { return _data_filenames; }
-
-private:
- void Initialize();
- void Parse(const int argc, char **argv);
-
-private:
- po::options_description _options;
- po::positional_options_description _positional;
-
- std::string _tflite_filename;
- std::vector<std::string> _data_filenames;
-};
-
-} // namespace TFLiteRun
-
-#endif // __TFLITE_LOADER_TOOLS_SRC_ARGS_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "args.h"
-
-#include <nnfw_experimental.h>
-#include <nnfw_internal.h>
-
-#include <misc/EnvVar.h>
-#include <misc/RandomGenerator.h>
-
-#include <tflite/Assert.h>
-#include <tflite/InterpreterSession.h>
-#include <tflite/ext/kernels/register.h>
-
-#include <iostream>
-#include <fstream>
-#include <memory>
-
-const int RUN_FAILED = 1;
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-const int FILE_ERROR = 2;
-const float DIFFERENCE_THRESHOLD = 10e-5;
-
-#define NNFW_ASSERT_FAIL(expr, msg) \
- if ((expr) != NNFW_STATUS_NO_ERROR) \
- { \
- std::cerr << msg << std::endl; \
- exit(-1); \
- }
-
-// Read vector of floats from selected file
-void readData(const string &path, std::vector<uint8_t> &dest)
-{
- std::ifstream in(path);
- if (!in.good())
- {
- std::cerr << "can not open data file " << path << "\n";
- exit(FILE_ERROR);
- }
- in.seekg(0, std::ifstream::end);
- size_t len = in.tellg();
- in.seekg(0, std::ifstream::beg);
-
- assert(dest.size() == len);
- in.read(reinterpret_cast<char *>(dest.data()), len);
-}
-
-template <typename T>
-void randomData(nnfw::misc::RandomGenerator &randgen, std::vector<uint8_t> &dest)
-{
- size_t elements = dest.size() / sizeof(T);
- assert(dest.size() % sizeof(T) == 0);
-
- std::vector<T> vec(elements);
- for (uint64_t i = 0; i < elements; i++)
- {
- vec[i] = randgen.generate<T>();
- }
- memcpy(dest.data(), vec.data(), elements * sizeof(T));
-}
-
-void randomBoolData(nnfw::misc::RandomGenerator &randgen, std::vector<uint8_t> &dest)
-{
- size_t elements = dest.size();
- std::vector<uint8_t> vec(elements);
- for (uint64_t i = 0; i < elements; i++)
- {
- bool value = randgen.generate<bool>();
- dest[i] = value ? 1 : 0;
- }
-}
-
-inline uint64_t num_elems(const nnfw_tensorinfo *ti)
-{
- uint64_t n = 1;
- for (uint32_t i = 0; i < ti->rank; ++i)
- {
- n *= ti->dims[i];
- }
- return n;
-}
-
-inline size_t sizeOfNnfwType(NNFW_TYPE type)
-{
- switch (type)
- {
- case NNFW_TYPE_TENSOR_BOOL:
- case NNFW_TYPE_TENSOR_UINT8:
- case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
- case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
- return 1;
- case NNFW_TYPE_TENSOR_FLOAT32:
- case NNFW_TYPE_TENSOR_INT32:
- return 4;
- case NNFW_TYPE_TENSOR_INT64:
- return 8;
- default:
- throw std::runtime_error{"Invalid tensor type"};
- }
-}
-
-template <typename T>
-bool compareBuffersExact(const T *ref_buf, const std::vector<uint8_t> &act_buf, uint32_t index)
-{
- bool match = true;
- for (uint32_t e = 0; e < act_buf.size() / sizeof(T); e++)
- {
- T ref = ref_buf[e];
- T act = reinterpret_cast<const T *>(act_buf.data())[e];
-
- if (ref != act)
- {
- std::cerr << "Output #" << index << ", Element Index : " << e << ", ref: " << ref
- << ", act: " << act << std::endl;
- match = false;
- }
- }
-
- return match;
-}
-
-bool compareBuffersExactBool(const uint8_t *ref_buf, const std::vector<uint8_t> &act_buf,
- uint32_t index)
-{
- bool match = true;
- for (uint32_t e = 0; e < act_buf.size() / sizeof(uint8_t); e++)
- {
- uint8_t ref_raw = ref_buf[e];
- bool ref = (ref_raw != 0 ? true : false);
- uint8_t act_raw = reinterpret_cast<const uint8_t *>(act_buf.data())[e];
- bool act = (act_raw != 0 ? true : false);
- if (ref != act)
- {
- std::cerr << "Output #" << index << ", Element Index : " << e << ", ref: " << ref
- << ", act: " << act << std::endl;
- match = false;
- }
- }
-
- return match;
-}
-
-int main(const int argc, char **argv)
-{
- TFLiteRun::Args args(argc, argv);
-
- auto tflite_file = args.getTFLiteFilename();
- auto data_files = args.getDataFilenames();
-
- if (tflite_file.empty())
- {
- args.print(argv);
- return RUN_FAILED;
- }
-
- std::cout << "[Execution] Stage start!" << std::endl;
- // Loading
- nnfw_session *onert_session = nullptr;
- NNFW_ASSERT_FAIL(nnfw_create_session(&onert_session), "[ ERROR ] Failure during model load");
- if (onert_session == nullptr)
- {
- std::cerr << "[ ERROR ] Failure to open session" << std::endl;
- exit(-1);
- }
-
- NNFW_ASSERT_FAIL(nnfw_load_model_from_modelfile(onert_session, tflite_file.c_str()),
- "[ ERROR ] Failure during model load");
-
- uint32_t num_inputs;
- uint32_t num_outputs;
- NNFW_ASSERT_FAIL(nnfw_input_size(onert_session, &num_inputs),
- "[ ERROR ] Failure during get model inputs");
- NNFW_ASSERT_FAIL(nnfw_output_size(onert_session, &num_outputs),
- "[ ERROR ] Failure during get model outputs");
-
- std::cout << "[Execution] Model is deserialized!" << std::endl;
-
- // Compile
- nnfw_prepare(onert_session);
-
- std::cout << "[Execution] Model compiled!" << std::endl;
-
- // Prepare input/output data
- std::vector<std::vector<uint8_t>> inputs(num_inputs);
- std::vector<std::vector<uint8_t>> outputs(num_outputs);
-
- bool generate_data = data_files.empty();
- bool read_data = data_files.size() == num_inputs;
- if (!generate_data && !read_data)
- {
- std::cerr << "[ ERROR ] "
- << "Wrong number of input files." << std::endl;
- exit(1);
- }
-
- const int seed = 1; /* TODO Add an option for seed value */
- nnfw::misc::RandomGenerator randgen{seed, 0.0f, 2.0f};
-
- for (uint32_t i = 0; i < num_inputs; i++)
- {
- nnfw_tensorinfo ti_input;
- NNFW_ASSERT_FAIL(nnfw_input_tensorinfo(onert_session, i, &ti_input),
- "[ ERROR ] Failure during get input data info");
- size_t input_size = num_elems(&ti_input) * sizeOfNnfwType(ti_input.dtype);
-
- inputs[i].resize(input_size);
-
- if (generate_data)
- {
- switch (ti_input.dtype)
- {
- case NNFW_TYPE_TENSOR_BOOL:
- randomBoolData(randgen, inputs[i]);
- break;
- case NNFW_TYPE_TENSOR_UINT8:
- case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
- randomData<uint8_t>(randgen, inputs[i]);
- break;
- case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
- randomData<int8_t>(randgen, inputs[i]);
- break;
- case NNFW_TYPE_TENSOR_FLOAT32:
- randomData<float>(randgen, inputs[i]);
- break;
- case NNFW_TYPE_TENSOR_INT32:
- randomData<int32_t>(randgen, inputs[i]);
- break;
- case NNFW_TYPE_TENSOR_INT64:
- randomData<uint64_t>(randgen, inputs[i]);
- break;
- default:
- std::cerr << "[ ERROR ] "
- << "Unspported input data type" << std::endl;
- exit(-1);
- break;
- }
- }
- else /* read_data */
- readData(data_files[i], inputs[i]);
-
- NNFW_ASSERT_FAIL(nnfw_set_input(onert_session, i, ti_input.dtype, inputs[i].data(), input_size),
- "[ ERROR ] Failure to set input tensor buffer");
- }
-
- std::cout << "[Execution] Input data is defined!" << std::endl;
-
- for (uint32_t i = 0; i < num_outputs; i++)
- {
- nnfw_tensorinfo ti_output;
- NNFW_ASSERT_FAIL(nnfw_output_tensorinfo(onert_session, i, &ti_output),
- "[ ERROR ] Failure during get output tensor info");
-
- uint64_t output_elements = num_elems(&ti_output);
- size_t output_size = output_elements * sizeOfNnfwType(ti_output.dtype);
- outputs[i].resize(output_size);
-
- NNFW_ASSERT_FAIL(
- nnfw_set_output(onert_session, i, ti_output.dtype, outputs[i].data(), output_size),
- "[ ERROR ] Failure to set output tensor buffer");
- }
-
- // Execute
- NNFW_ASSERT_FAIL(nnfw_run(onert_session), "[Execution] Can't execute");
-
- std::cout << "[Execution] Done!" << std::endl;
-
- // Compare with tflite
- std::cout << "[Comparison] Stage start!" << std::endl;
- // Read tflite model
- StderrReporter error_reporter;
- auto model = FlatBufferModel::BuildFromFile(tflite_file.c_str(), &error_reporter);
-
- BuiltinOpResolver resolver;
- InterpreterBuilder builder(*model, resolver);
-
- std::unique_ptr<Interpreter> interpreter;
- try
- {
- TFLITE_ENSURE(builder(&interpreter));
- }
- catch (const std::exception &e)
- {
- std::cerr << e.what() << std::endl;
- exit(FILE_ERROR);
- }
- interpreter->SetNumThreads(nnfw::misc::EnvVar("THREAD").asInt(-1));
-
- auto sess = std::make_shared<nnfw::tflite::InterpreterSession>(interpreter.get());
- sess->prepare();
- // Set input and run
- for (uint32_t i = 0; i < num_inputs; i++)
- {
- auto input_tensor = interpreter->tensor(interpreter->inputs().at(i));
- memcpy(input_tensor->data.uint8, inputs[i].data(), inputs[i].size());
- }
- if (!sess->run())
- {
- std::cout << "[Comparison] TFLite run failed!" << std::endl;
- assert(0 && "Run failed!");
- }
- std::cout << "[Comparison] TFLite run done!" << std::endl;
-
- // Calculate max difference over all outputs
- float max_float_difference = 0.0f;
- bool find_unmatched_output = false;
-
- for (uint32_t out_idx = 0; out_idx < num_outputs; out_idx++)
- {
- nnfw_tensorinfo ti;
- nnfw_output_tensorinfo(onert_session, out_idx, &ti);
-
- bool matched = true;
- // Check output tensor values
-
- const auto &ref_output = interpreter->tensor(interpreter->outputs().at(out_idx))->data;
- const auto &output = outputs[out_idx];
-
- switch (ti.dtype)
- {
- case NNFW_TYPE_TENSOR_BOOL:
- matched = compareBuffersExactBool(ref_output.uint8, output, out_idx);
- break;
- case NNFW_TYPE_TENSOR_UINT8:
- case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
- matched = compareBuffersExact<uint8_t>(ref_output.uint8, output, out_idx);
- break;
- case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
- matched = compareBuffersExact<int8_t>(ref_output.int8, output, out_idx);
- break;
- case NNFW_TYPE_TENSOR_INT32:
- matched = compareBuffersExact<int32_t>(ref_output.i32, output, out_idx);
- break;
- case NNFW_TYPE_TENSOR_FLOAT32:
- // TODO better way for handling FP error?
- for (uint32_t e = 0; e < num_elems(&ti); e++)
- {
- float refval = ref_output.f[e];
- float val = reinterpret_cast<const float *>(output.data())[e];
- if (std::abs(refval - val) > max_float_difference)
- max_float_difference = std::abs(refval - val);
-
- if (max_float_difference > DIFFERENCE_THRESHOLD)
- matched = false;
- }
- break;
- case NNFW_TYPE_TENSOR_INT64:
- matched = compareBuffersExact<int64_t>(ref_output.i64, output, out_idx);
- break;
- default:
- throw std::runtime_error{"Invalid tensor type"};
- }
-
- if (!matched)
- find_unmatched_output = true;
- }
-
- // Print results
- std::cout << "[Comparison] Max float difference: " << max_float_difference << std::endl;
- int ret = 0;
- if (find_unmatched_output)
- {
- std::cout << "[Comparison] outputs is not equal!" << std::endl;
- if (max_float_difference > DIFFERENCE_THRESHOLD)
- {
- std::cout << "[Comparison] Float outputs is not equal!" << std::endl;
- }
- ret = 1;
- }
- else
- {
- std::cout << "[Comparison] Outputs is equal!" << std::endl;
- }
- std::cout << "[Comparison] Done!" << std::endl;
-
- nnfw_close_session(onert_session);
-
- return ret;
-}
#include "tflite/Diff.h"
#include "tflite/Assert.h"
#include "tflite/Session.h"
+#include "tflite/RandomInputInitializer.h"
#include "tflite/InterpreterSession.h"
#include "tflite/NNAPISession.h"
#include "misc/tensor/IndexIterator.h"
}
};
-} // namespace anonymous
+} // namespace
int main(const int argc, char **argv)
{
BuiltinOpResolver resolver;
InterpreterBuilder builder(*model, resolver);
TFLITE_ENSURE(builder(&interpreter))
- interpreter->SetNumThreads(nnfw::misc::EnvVar("THREAD").asInt(-1));
+ interpreter->SetNumThreads(nnfw::misc::EnvVar("THREAD").asInt(1));
});
}
catch (const std::exception &e)
const int seed = 1; /* TODO Add an option for seed value */
nnfw::misc::RandomGenerator randgen{seed, 0.0f, 2.0f};
- // No input specified. So we fill the input tensors with random values.
- for (const auto &o : interpreter->inputs())
- {
- TfLiteTensor *tensor = interpreter->tensor(o);
- if (tensor->type == kTfLiteInt32)
- {
- // Generate singed 32-bit integer (s32) input
- auto tensor_view = nnfw::tflite::TensorView<int32_t>::make(*interpreter, o);
-
- int32_t value = 0;
-
- nnfw::misc::tensor::iterate(tensor_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- // TODO Generate random values
- // Gather operation: index should be within input coverage.
- tensor_view.at(ind) = value;
- value++;
- };
- }
- else if (tensor->type == kTfLiteUInt8)
- {
- // Generate unsigned 8-bit integer input
- auto tensor_view = nnfw::tflite::TensorView<uint8_t>::make(*interpreter, o);
-
- auto fp = static_cast<uint8_t (nnfw::misc::RandomGenerator::*)(
- const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
- &nnfw::misc::RandomGenerator::generate<uint8_t>);
- const nnfw::misc::tensor::Object<uint8_t> data(tensor_view.shape(),
- std::bind(fp, randgen, _1, _2));
-
- nnfw::misc::tensor::iterate(tensor_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- const auto value = data.at(ind);
- tensor_view.at(ind) = value;
- };
- }
- else if (tensor->type == kTfLiteBool)
- {
- // Generate bool input
- auto tensor_view = nnfw::tflite::TensorView<bool>::make(*interpreter, o);
-
- auto fp = static_cast<bool (nnfw::misc::RandomGenerator::*)(
- const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
- &nnfw::misc::RandomGenerator::generate<bool>);
- const nnfw::misc::tensor::Object<bool> data(tensor_view.shape(),
- std::bind(fp, randgen, _1, _2));
-
- nnfw::misc::tensor::iterate(tensor_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- const auto value = data.at(ind);
- tensor_view.at(ind) = value;
- };
- }
- else
- {
- assert(tensor->type == kTfLiteFloat32);
-
- const float *end = reinterpret_cast<const float *>(tensor->data.raw_const + tensor->bytes);
- for (float *ptr = tensor->data.f; ptr < end; ptr++)
- {
- *ptr = randgen.generate<float>();
- }
- }
- }
+ RandomInputInitializer initializer{randgen};
+ initializer.run(*(interpreter.get()));
}
TFLiteRun::TensorDumper tensor_dumper;
set(BUILD_TENSORFLOW_LITE_2_3_0 ON)
endif()
-nnfw_find_package(TensorFlowLite-2.3.0 REQUIRED)
+nnfw_find_package(TensorFlowLite EXACT 2.3.0 REQUIRED)
nnfw_find_package(Boost REQUIRED)
list(APPEND TFLITE_RUN_SRCS "src/tflite_vanilla_run.cc")
}
};
-} // namespace anonymous
+} // namespace
int main(const int argc, char **argv)
{
+++ /dev/null
-../.clang-format.8
\ No newline at end of file
+++ /dev/null
-deb http://ports.ubuntu.com/ubuntu-ports/ trusty main restricted universe
-deb-src http://ports.ubuntu.com/ubuntu-ports/ trusty main restricted universe
-
-deb http://ports.ubuntu.com/ubuntu-ports/ trusty-updates main restricted universe
-deb-src http://ports.ubuntu.com/ubuntu-ports/ trusty-updates main restricted universe
-
-deb http://ports.ubuntu.com/ubuntu-ports/ trusty-backports main restricted
-deb-src http://ports.ubuntu.com/ubuntu-ports/ trusty-backports main restricted
-
-deb http://ports.ubuntu.com/ubuntu-ports/ trusty-security main restricted universe multiverse
-deb-src http://ports.ubuntu.com/ubuntu-ports/ trusty-security main restricted universe multiverse
+++ /dev/null
-deb http://ports.ubuntu.com/ubuntu-ports/ trusty main restricted universe
-deb-src http://ports.ubuntu.com/ubuntu-ports/ trusty main restricted universe
-
-deb http://ports.ubuntu.com/ubuntu-ports/ trusty-updates main restricted universe
-deb-src http://ports.ubuntu.com/ubuntu-ports/ trusty-updates main restricted universe
-
-deb http://ports.ubuntu.com/ubuntu-ports/ trusty-backports main restricted
-deb-src http://ports.ubuntu.com/ubuntu-ports/ trusty-backports main restricted
-
-deb http://ports.ubuntu.com/ubuntu-ports/ trusty-security main restricted universe multiverse
-deb-src http://ports.ubuntu.com/ubuntu-ports/ trusty-security main restricted universe multiverse
{
echo "Usage: $0 [BuildArch] [LinuxCodeName] [--setproxy=IP] [--skipunmount]"
echo "BuildArch can be: arm(default), aarch64 and armel"
- echo "LinuxCodeName - optional, Code name for Linux, can be: bionic(default), trusty, xenial, focal"
+ echo "LinuxCodeName - optional, Code name for Linux, can be: xenial, bionic(default), focal"
echo " If BuildArch is armel, this can be tizen(default)"
echo "--setproxy=IP - optional, IP is the proxy server IP address or url with portnumber"
echo " default no proxy. Example: --setproxy=127.1.2.3:8080"
__UbuntuRepo=
__LinuxCodeName=
;;
- trusty)
- __LinuxCodeName=trusty
- ;;
xenial)
__LinuxCodeName=xenial
;;
if this_dtype == tf.uint8:
input_values.append(
np.random.randint(0, 255, this_shape).astype(np.uint8))
+ if this_dtype == tf.int8:
+ input_values.append(
+ np.random.randint(-127, 127, this_shape).astype(np.int8))
elif this_dtype == tf.float32:
input_values.append(
np.random.random_sample(this_shape).astype(np.float32))
if this_dtype == np.uint8:
input_values.append(
np.random.randint(0, 255, this_shape).astype(np.uint8))
+ if this_dtype == np.int8:
+ input_values.append(
+ np.random.randint(-127, 127, this_shape).astype(np.int8))
elif this_dtype == np.float32:
input_values.append(
np.random.random_sample(this_shape).astype(np.float32))
# dump input and output in h5
import h5py
- supported_dtypes = ("float32", "uint8", "bool", "int32", "int64")
+ supported_dtypes = ("float32", "uint8", "int8", "bool", "int32", "int64")
h5dtypes = {
"float32": ">f4",
"uint8": "u1",
+ "int8": "i1",
"bool": "u1",
"int32": "int32",
"int64": "int64"
--- /dev/null
+This folder contains the necessary scripts to perform a pareto front estimation for machine learning models. Currently, the scripts support target devices running on Tizen, as well as `Odroid-XU4`.
+
+The contents of the folder can be categorized into the following groups:
+
+- [Generator scripts to map decision variables to `nnpackage_run` parameters](#mapping-decision-to-parameters)
+- [Estimator scripts to compute pareto front](#pareto-estimation)
+
+The following subsections describe the role of each script in detail.
+
+## Mapping Decision to Parameters
+The generator script `gen_oplist.py` is located under `generator` folder, and encodes large integer representations for `nnpackage` backend assignments. Effectively, it maps suitable backend assignments to integer values. For example, a graph with only three operations and two backends will have a integer representation in the range `(0, 7)`. Thus a value `0` might imply all operations run on `cpu`, while `7` might imply that all operations run on `acl_cl` backend. As will be described below, the integer representation of `nnpackage` parameters serves as a convenient decision space for pareto estimation.
+
+Setting up parameters for `nnpackage_run` requires a knowledge of model-specific operations. To this end, the `gen_oplist.py` script generates for each model, a `oplist` of unique operations. If an exhaustive mapping of backends to operation sequences is preferred, then `gen_oplist.py` also generates a so-called `opmap` list for uniquely observed `<operation name, data size>` pairs.
+
+`gen_oplist.py` is run on the development environment (read: *Desktop PC*) as shown below:
+```
+python3 gen_oplist.py <tflite model> <target>
+```
+
+The list of model operations and their mapping to graph node indexes are stored in a *oplist.json* file, and transferred to the target device. For further details about usage, type `python3 gen_oplist.py --help`.
+
+## Pareto Estimation
+Scripts under the `estimator` folder fall under two categories, namely an [exhaustive, brute-force profiling](#exhaustive-profiling), and a [on-device version of pareto estimation](#on-device-pareto-estimation). These are described in detail below.
+
+## Exhaustive Profiling
+For the sake of testing several pareto estimation algorithms *offline* on common lookup data, the `generator` folder includes a `brute_force_profiler.py` that records all solutions in the decision *or* assignment space. `brute_force_profiler.py` is typically run on target device, with the following syntax:
+
+```
+python brute_force_profiler.py <model> <target> <run_folder> [--dumpfile=<filename>]
+```
+For details, type `python brute_force_profiler.py --help`. Below is a example of the dump generated by the brute-force profiler:
+
+```
+{"oplist": ["Pool2D", "BinaryArithmetic", "DepthwiseConv2D", "Conv2D", "Reshape"],
+ "solutions": [
+ {"memory": 56388, "id": 0, "time": 72.525},
+ {"memory": 63624, "id": 1, "time": 86.532},
+ {"memory": 64320, "id": 2, "time": 69.352},
+ {"memory": 65376, "id": 3, "time": 76.436},
+ {"memory": 73016, "id": 4, "time": 69.634},
+ {"memory": 73492, "id": 5, "time": 47.013},
+ {"memory": 74488, "id": 6, "time": 95.01},
+ {"memory": 74844, "id": 7, "time": 111.329},
+ {"memory": 393324, "id": 8, "time": 98.956},
+ {"memory": 395088, "id": 9, "time": 103.24},
+ {"memory": 396180, "id": 10, "time": 68.107},
+ {"memory": 395932, "id": 11, "time": 86.109},
+ {"memory": 402468, "id": 12, "time": 25.477},
+ {"memory": 402800, "id": 13, "time": 25.42},
+ {"memory": 403904, "id": 14, "time": 9.168},
+ {"memory": 404476, "id": 15, "time": 7.801},
+....
+ {"memory": 403940, "id": 30, "time": 9.145},
+ {"memory": 403568, "id": 31, "time": 8.034}]}
+```
+
+**Note**: As of present, the pareto estimation algorithms run on-device, and will support an *offline* mode in the near future.
+
+## On Device Pareto Estimation
+Currently the `estimator` folder includes only a `random_sampler.py`, however, in future, it will feature a set of pareto estimation algorithms. Regardless of the algorithm, the following steps must be carried out in sequence:
+
+1. Generate the oplist using `gen_oplist.py`, and transfer the JSON file to the target device. This step is performed on the development environment
+
+2. Copy the contents of the `estimator` folder to the target (*scp* for odroid, *sdb push* for tizen), at a preferred location
+
+3. On the target device, run the pareto-estimation algorithm. The following example shows how to run `random_sampler.py` (see `python random_sampler.py --help` for details)
+```
+python random_sampler.py /root/img_model/mobilenetv2/ /opt/usr/nnfw-test/Product/out/bin --mode=name --dumpfile=/tmp/mobilenetv2_opname_profile.json --iterations=20
+```
+After profiling, the results can be viewed under the filename provided by the `--dumpfile` argument. Below is an illustrative example of the same model that was brute-forced above:
+
+```
+{"configs": {
+ "4": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=cpu OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=cpu OP_BACKEND_BinaryArithmetic=cpu ",
+ "10": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=acl_cl OP_BACKEND_Reshape=cpu OP_BACKEND_Conv2D=acl_cl OP_BACKEND_BinaryArithmetic=cpu ",
+ "14": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=acl_cl OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=acl_cl OP_BACKEND_BinaryArithmetic=cpu ",
+ "16": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=cpu OP_BACKEND_Reshape=cpu OP_BACKEND_Conv2D=cpu OP_BACKEND_BinaryArithmetic=acl_cl ",
+ "20": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=cpu OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=cpu OP_BACKEND_BinaryArithmetic=acl_cl ",
+ "21": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=acl_cl OP_BACKEND_DepthwiseConv2D=cpu OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=cpu OP_BACKEND_BinaryArithmetic=acl_cl ",
+ "31": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=acl_cl OP_BACKEND_DepthwiseConv2D=acl_cl OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=acl_cl OP_BACKEND_BinaryArithmetic=acl_cl "},
+ "oplist": ["Pool2D", "DepthwiseConv2D", "Reshape", "Conv2D", "BinaryArithmetic"],
+ "solutions": [
+ {"exec_time": 76.138, "max_rss": 62712, "id": 4},
+ {"exec_time": 72.719, "max_rss": 65272, "id": 16},
+ {"exec_time": 22.409, "max_rss": 403120, "id": 14},
+ {"exec_time": 28.138, "max_rss": 403064, "id": 10},
+ {"exec_time": 70.656, "max_rss": 65536, "id": 20},
+ {"exec_time": 68.805, "max_rss": 66076, "id": 21},
+ {"exec_time": 8.201, "max_rss": 404656, "id": 31}], "mode": "name"}
+```
+**Note**: The pareto-estimation algorithms require the use of python `numpy` package, so make sure to install it beforehand.
+
+
+
+
--- /dev/null
+#! /usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import sys
+import Queue
+import utils
+import signal
+from pareto import ParetoData
+
+
+class Hlps:
+ """
+ Initialize Runner and Pareto data structure
+ """
+
+ def __init__(self, runner, num_backends, num_samples):
+ self._runner = runner
+ self._num_backends = num_backends
+ self._num_samples = num_samples
+ self._marked = {}
+ self._extended_search = False
+ self._iteration = 0
+ self._pareto_obj = ParetoData()
+
+ """
+ Method to generate new samples from a given sample v_vec.
+ The new samples bear a hamming distance hd from the provided sample.
+ """
+
+ def gen_hamming(self, v_vec, hd=1, nsamples=None):
+ if nsamples is None:
+ nsamples = self._num_backends - 1
+ ret = np.zeros((nsamples, len(v_vec)), dtype=int)
+ v = v_vec
+ marked = np.full(len(v), False, dtype=bool)
+ cnt = 0
+
+ for r in range(nsamples):
+ ret[r] = v
+ rnd_pos = np.random.permutation(range(len(v)))
+ for i in range(hd):
+ pos = rnd_pos[i]
+ marked[pos] = True
+ for r in range(nsamples):
+ ret[r][pos] = (v[pos] - r - 1) % self._num_backends
+
+ return ret
+
+ """
+ Method to generate all samples from a given sample v_vec, that
+ have a hamming distance of one with respect to it.
+ """
+
+ def gen_hamming_one(self, v_vec, invert=False):
+ ret = np.zeros(((self._num_backends - 1) * len(v_vec), len(v_vec)), dtype=int)
+ if invert == False:
+ v = v_vec
+ else:
+ v = [1 - x for x in v_vec]
+ for nb in range(1, self._num_backends):
+ c = 0
+ for r in range((nb - 1) * len(v), nb * len(v)):
+ ret[r] = v
+ ret[r][c] = (v[c] - nb) % self._num_backends
+ c += 1
+ return ret
+
+ """
+ Enable profiling over extended search space
+ """
+
+ def enable_extended_search(self):
+ self._extended_search = True
+ for key in self._pareto_obj.get_pareto_keys():
+ config = self._pareto_obj.get_config(key)
+ extended_val = self._runner.get_extended_solution(config)
+ self._pareto_obj.set_config(key, extended_val)
+ self._iteration = 0
+
+ """
+ HLPS algorithm implementation provided here.
+ Description: Starting with a random sample, fill up a sampling
+ queue with hamming neighbors. Fetch samples from queue,
+ each time checking for pareto optimality. Pareto-optimal samples
+ are then explored/exploited to generate new samples that are added to the queue.
+ Algorithm phase terminates when the queue is empty.
+ Repeat this phase in a multi-shot invokation for better results.
+ """
+
+ def hlps_routine(self, config_ids):
+ # Initialize
+ solution_q = Queue.Queue()
+ visited = {}
+ nbits = self._runner.get_nbits(self._extended_search)
+ is_extended = self._runner.get_mode_extended()
+ nsolutions = self._num_backends**nbits
+
+ stop_insert = False
+
+ cnt = 0
+ q_add_cnt = 0
+ round_cnt = 0
+
+ def extended_solution(s):
+ return self._runner.get_extended_solution(s)
+
+ def mark_solution(s):
+ if is_extended == True and self._extended_search == False:
+ self._marked[extended_solution(s)] = True
+ else:
+ self._marked[s] = True
+
+ def is_marked(s):
+ if is_extended == True and self._extended_search == False:
+ return (extended_solution(s) in self._marked)
+ else:
+ return (s in self._marked)
+
+ def visit_solution(s):
+ if is_extended == True and self._extended_search == False:
+ visited[extended_solution(s)] = True
+ else:
+ visited[s] = True
+
+ def is_visited(s):
+ if is_extended == True and self._extended_search == False:
+ return (extended_solution(s) in visited)
+ else:
+ return (s in visited)
+
+ def sigint_handler(signum, frame):
+ print("Round cnt = ", round_cnt)
+
+ signal.signal(signal.SIGINT, sigint_handler)
+ if len(config_ids) > 0:
+ for solution in config_ids:
+ if is_extended == True and self._extended_search == True and self._iteration == 0:
+ s = extended_solution(solution)
+ else:
+ s = solution
+ s_vec = utils.int_to_vec(s, self._num_backends, nbits)
+
+ candidate = self.gen_hamming_one(s_vec)
+ for hd in range((self._num_backends - 1) * nbits):
+ candidate_int = int(''.join(str(x) for x in reversed(candidate[hd])),
+ self._num_backends)
+ if is_marked(candidate_int) == False:
+ solution_q.put(candidate_int)
+ mark_solution(candidate_int)
+ q_add_cnt += 1
+ else:
+ start_seed = int(np.random.rand() * (nsolutions))
+ solution_q.put(start_seed)
+ q_add_cnt += 1
+
+ self._iteration += 1
+ # Main routine
+ while not solution_q.empty():
+ s = solution_q.get()
+ mark_solution(s)
+ stop_insert = False
+ if (round_cnt % 100 == 0):
+ print("sample count = ", round_cnt)
+ if self._extended_search == True:
+ print("Queue size is ", solution_q.qsize())
+
+ if is_extended == True and self._extended_search == False:
+ time_val, memory_val = self._runner.profile_by_opname(s)
+ elif is_extended == True:
+ time_val, memory_val = self._runner.profile_by_opindex(s)
+ else:
+ time_val, memory_val = self._runner.profile_by_opname(s)
+ round_cnt += 1
+
+ utils.progressbar(round_cnt, nsolutions, prefix="% samples computed. : ")
+ self._pareto_obj.update_pareto_solutions(
+ s, time_val, memory_val, explore_flag=True)
+
+ for key in self._pareto_obj.get_pareto_keys():
+ pareto_sample = self._pareto_obj.get_config(key)
+ explore_sample = self._pareto_obj.get_exploration(key)
+
+ if is_visited(pareto_sample):
+ continue
+ visit_solution(pareto_sample)
+ s_vec = utils.int_to_vec(pareto_sample, self._num_backends, nbits)
+
+ if explore_sample == True:
+ # Explore solutions over a larger range
+ for hd in range(1, nbits + 1):
+ if stop_insert is True:
+ break
+
+ candidate = self.gen_hamming(s_vec, hd=hd)
+ for i in range(self._num_backends - 1):
+ if stop_insert is True:
+ break
+ candidate_int = int(
+ ''.join(str(x) for x in reversed(candidate[i])),
+ self._num_backends)
+ try:
+ if is_marked(candidate_int) == False:
+ solution_q.put(candidate_int)
+ q_add_cnt += 1
+ except IndexError:
+ print("candidate[i] = ", candidate[i],
+ ', candidate_int = ', candidate_int)
+ sys.exit(-1)
+ if (q_add_cnt >= self._num_samples):
+ print("Queue full in explore")
+ stop_insert = True
+ else:
+ # Exploit solutions within immediate neighborhood
+ candidate = self.gen_hamming_one(s_vec)
+
+ for j in range((self._num_backends - 1) * nbits):
+ if stop_insert is True:
+ break
+ candidate_int = int(
+ ''.join(str(x) for x in reversed(candidate[j])),
+ self._num_backends)
+ if is_marked(candidate_int) == False:
+ solution_q.put(candidate_int)
+ q_add_cnt += 1
+ if (q_add_cnt >= self._num_samples):
+ print("Queue full in exploit")
+ stop_insert = True
+ self._pareto_obj.set_exploration(key)
+
+ pfront = set([
+ self._pareto_obj.get_config(key)
+ for key in self._pareto_obj.get_pareto_keys()
+ ])
+ return pfront, q_add_cnt
+
+ """
+ Method to dump results from HLPS
+ """
+
+ def dump_results(self, dumpdata):
+ dumpdata = self._pareto_obj.dump_pareto_solutions(dumpdata)
+ dumpdata = self._runner.dump_config(dumpdata)
+ return dumpdata
--- /dev/null
+#! /usr/bin/python
+import argparse
+import json
+import sys
+from profile_args import ProfileArgs
+from runner import Runner
+from utils import progressbar
+
+if __name__ == "__main__":
+ parser = ProfileArgs(
+ prog="brute_force_profiler.py", description="Profiles nnpackage_run using oplist")
+ # Parse arguments
+ args = parser.parse_args()
+ modelfile = args.model
+ mode = args.mode
+ n_backends = args.backends
+ dumpfile = args.dumpfile
+
+ # Initialize a runner for given model and target
+ runner = Runner(args.model, args.run_folder, args.backends, args.mode)
+ nruns = runner.get_solution_spacelen()
+ profile_results = {}
+ profile_results['solutions'] = []
+ chk_ptr = 0
+
+ # Profile each backend setting, record execution time and peak memory
+ for r in range(nruns):
+ if (r % 100) == 0:
+ # Checkpointing results, in case the runs take too long
+ if chk_ptr > 0:
+ with open("/tmp/solutions.json") as ifile:
+ tmp_results = json.load(ifile)
+
+ with open("/tmp/solutions.json", "w") as ofile:
+ json.dump(tmp_results + profile_results['solutions'][chk_ptr:], ofile)
+ else:
+ with open("/tmp/solutions.json", "w") as ofile:
+ json.dump(profile_results['solutions'], ofile)
+ chk_ptr = r
+
+ if args.mode == "name":
+ exec_time, max_rss = runner.profile_by_opname(r)
+ elif args.mode == "index":
+ exec_time, max_rss = runner.profile_by_opindex(r)
+ else:
+ print("Invalid mode ", mode)
+ sys.exit(-1)
+
+ profile_results['solutions'].append({
+ "time": exec_time,
+ "memory": max_rss,
+ "id": r
+ })
+ progressbar(r, nruns, prefix="% samples computed. : ")
+ progressbar(nruns, nruns, prefix="% samples computed. : ")
+
+ oplist, opmap, opname_by_indx = runner.get_opconfig()
+
+ if args.mode == "index":
+ profile_results['oplist'] = oplist
+ profile_results['opmap'] = opmap
+ profile_results['opname_by_indx'] = opname_by_indx
+ elif args.mode == "name":
+ profile_results['oplist'] = oplist
+ else:
+ print("Invalid mode ", mode)
+ sys.exit(-1)
+
+ with open(dumpfile, "w") as ofile:
+ json.dump(profile_results, ofile)
+ print "\nDone.."
--- /dev/null
+#! /usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import utils
+import sys
+import json
+import time
+from Hlps import Hlps
+from profile_args import ProfileArgs
+from runner import Runner
+
+
+def hlps_profiler(modelfile,
+ run_folder,
+ num_backends=2,
+ mode="name",
+ nruns=3,
+ num_samples=2000,
+ dumpfile=None):
+ runner = Runner(modelfile, run_folder, num_backends, mode=mode)
+ hlps = Hlps(runner, num_backends=num_backends, num_samples=num_samples)
+
+ config_set = set()
+ sample_cnt = 0
+ total_reject_list = []
+
+ for r in range(nruns):
+ config_set, sample_cnt_iter = hlps.hlps_routine(config_set)
+ sample_cnt += sample_cnt_iter
+
+ # Add the index mode search here.
+ print("Starting search over extended space")
+ print("\n")
+ if mode == "index":
+ hlps.enable_extended_search()
+ for r in range(nruns):
+ config_set, sample_cnt_iter = hlps.hlps_routine(config_set)
+ sample_cnt += sample_cnt_iter
+
+ # Export results to json file
+ # Dump profiler results
+ dumpdata = {}
+ dumpdata['mode'] = args.mode
+ dumpdata['sample_cnt'] = sample_cnt
+ dumpdata = hlps.dump_results(dumpdata)
+ with open(dumpfile, "w") as ofile:
+ json.dump(dumpdata, ofile)
+
+
+if __name__ == "__main__":
+ t_start = time.time()
+ parser = ProfileArgs(
+ "hlps_on_device.py",
+ description="On-Device Optimizing Profiler for TensorFlowLite Models")
+ parser.add_argument(
+ '--iterations',
+ type=int,
+ default=3,
+ help='Number of iterations, less than 10 should be enough')
+ parser.add_argument(
+ '--samples', type=int, default=2000, help='Number of samples per iteration')
+ parser.add_argument(
+ '--offline',
+ type=bool,
+ default=False,
+ help='Set to True for running over profiled data')
+ parser.add_argument('--profiled_data', type=str, help='Profile file with path')
+
+ args = parser.parse_args()
+
+ hlps_profiler(
+ args.model,
+ args.run_folder,
+ num_backends=args.backends,
+ mode=args.mode,
+ nruns=args.iterations,
+ num_samples=args.samples,
+ dumpfile=args.dumpfile)
+ t_end = time.time()
+ with open(args.dumpfile, "r") as ifile:
+ dumpdata = json.load(ifile)
+ dumpdata['profiling time'] = (t_end - t_start)
+ with open(args.dumpfile, "w") as ofile:
+ json.dump(dumpdata, ofile)
+ print("done.., profiling time = ", (t_end - t_start), " seconds")
--- /dev/null
+#! /usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+class ParetoData:
+ def __init__(self):
+ self._pareto_solutions = {}
+ self._configs = {}
+ self._cnt = 0
+ self._explore = {}
+
+ def add_pareto_entry(self,
+ sample,
+ exec_time,
+ max_rss,
+ key,
+ explore_flag,
+ check_one_hop=True):
+ self._pareto_solutions[key] = [exec_time, max_rss]
+ self._configs[key] = sample
+ if explore_flag == True and check_one_hop == True:
+ self._explore[key] = False
+ elif explore_flag == True and check_one_hop == False:
+ self._explore[key] = True
+
+ def update_pareto_solutions(self, sample, exec_time, max_rss, explore_flag=False):
+ new_item = True
+ if self._pareto_solutions:
+ for key in list(self._pareto_solutions):
+ if self._pareto_solutions[key][0] < exec_time and self._pareto_solutions[key][1] < max_rss:
+ new_item = False
+ break
+ elif self._pareto_solutions[key][0] > exec_time and self._pareto_solutions[key][1] > max_rss:
+ self.add_pareto_entry(sample, exec_time, max_rss, key, explore_flag,
+ True)
+ new_item = False
+
+ if new_item is True:
+ self.add_pareto_entry(sample, exec_time, max_rss, self._cnt, explore_flag,
+ False)
+ self._cnt += 1
+
+ def dump_pareto_solutions(self, dumpdata):
+ marked = {}
+ pareto_results = []
+ for i in range(self._cnt):
+ if self._configs[i] not in marked:
+ marked[self._configs[i]] = True
+ pareto_results.append({
+ "id": self._configs[i],
+ "exec_time": self._pareto_solutions[i][0],
+ "max_rss": self._pareto_solutions[i][1]
+ })
+ dumpdata.update({"solutions": pareto_results})
+
+ return dumpdata
+
+ def get_pareto_keys(self):
+ return self._configs.keys()
+
+ def get_config(self, key):
+ return self._configs[key]
+
+ def get_exploration(self, key):
+ return self._explore[key]
+
+ def set_exploration(self, key):
+ self._explore[key] = True
+
+ def set_config(self, key, extended_value):
+ self._configs[key] = extended_value
--- /dev/null
+#! /usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+
+
+class ProfileArgs(argparse.ArgumentParser):
+ def __init__(self, *args, **kwargs):
+ super(ProfileArgs, self).__init__(args, kwargs)
+ self.add_argument(
+ 'model', type=str, default=None, help='nnpackage name with path')
+ self.add_argument('run_folder', type=str, help="path to nnpackage_run executable")
+ self.add_argument(
+ '--mode',
+ type=str.lower,
+ choices=["index", "name"],
+ default="name",
+ help='Profile by operation index or name')
+ self.add_argument('--backends', type=int, default=2, help='Number of backends')
+ self.add_argument(
+ '--dumpfile',
+ type=str.lower,
+ default="/tmp/final_result.json",
+ help='JSON Dumpfile name with path')
--- /dev/null
+#! /usr/bin/python
+import argparse
+import json
+import numpy as np
+import sys
+import subprocess
+import time
+from pareto import ParetoData
+from profile_args import ProfileArgs
+from runner import Runner
+from utils import progressbar
+
+if __name__ == "__main__":
+ t_start = time.time()
+ parser = ProfileArgs("random_sampler.py", description="Random sampler")
+ parser.add_argument(
+ '--iterations', type=int, default=100, help='Number of iterations')
+
+ # Parse arguments
+ args = parser.parse_args()
+ dumpfile = args.dumpfile
+ iterations = args.iterations
+
+ # Initialize a runner and Pareto data structure obj
+ runner = Runner(args.model, args.run_folder, args.backends, args.mode)
+ pareto_obj = ParetoData()
+ # Initialize variables for random sampler
+ n_assignments = runner.get_solution_spacelen()
+ n_iterations = min(iterations, n_assignments)
+ chk_ptr = 0
+ marked_samples = {}
+
+ # Profile at random over solution space
+ for r in range(n_iterations):
+ random_sample = int(np.random.rand() * n_assignments)
+ while random_sample in marked_samples:
+ random_sample = int(np.random.rand() * n_assignments)
+ marked_samples[random_sample] = True
+ if args.mode == "name":
+ exec_time, max_rss = runner.profile_by_opname(random_sample)
+ elif args.mode == "index":
+ exec_time, max_rss = runner.profile_by_opindex(random_sample)
+ else:
+ print("Invalid mode ", mode)
+ sys.exit(-1)
+
+ pareto_obj.update_pareto_solutions(random_sample, exec_time, max_rss)
+ progressbar(r, n_assignments, prefix="% samples computed. : ")
+ progressbar(r + 1, n_assignments, prefix="% samples computed. : ")
+
+ # Dump profiler results
+ dumpdata = {}
+ dumpdata['mode'] = args.mode
+ dumpdata = pareto_obj.dump_pareto_solutions(dumpdata)
+ dumpdata = runner.dump_config(dumpdata)
+ with open(dumpfile, "w") as ofile:
+ json.dump(dumpdata, ofile)
+ t_end = time.time()
+ print("\n")
+ print("done.., profiling time = ", (t_end - t_start), " seconds")
--- /dev/null
+#! /usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import numpy as np
+from utils import fetch_config_by_name
+from utils import fetch_config_by_indx
+from utils import generate_vars
+from utils import generate_vars_for_indx
+from utils import exec_shell
+from utils import import_configs
+from utils import int_to_vec
+import sys
+
+
+class Mapper:
+ def __init__(self, opmap, oplist, opname_by_index):
+ self._opmap = opmap
+ self._oplist = oplist
+ self._opname_by_indx = opname_by_index
+
+ def get_oplist(self):
+ return self._oplist
+
+ def get_opmap(self):
+ return self._opmap
+
+ def get_opname_by_indx(self):
+ return self._opname_by_indx
+
+ def get_indices(self, value):
+ indx_list = []
+ for i in range(len(self._opname_by_indx)):
+ if self._opname_by_indx[i] == value:
+ indx_list.append(i)
+ return indx_list
+
+ def map_to_extended_space(self, n, backends):
+ n_vec = int_to_vec(n, backends, len(self._oplist))
+ extended_vec = np.zeros(max(self._opmap) + 1, dtype=int)
+ cnt = 0
+
+ for allocation in n_vec:
+ extended_pos = list(
+ set([self._opmap[i] for i in self.get_indices(self._oplist[cnt])]))
+ try:
+ extended_vec[extended_pos] = allocation
+ except IndexError:
+ print("extended_vec size = ", extended_vec.size, ", extended_pos = ",
+ extended_pos)
+ cnt += 1
+ extended_n = int(''.join(str(i) for i in extended_vec[::-1]), 2)
+ return extended_n
+
+
+class Runner:
+ def __init__(self, model, run_folder, num_backends, mode):
+ self._model = model
+ self._run_folder = run_folder
+ self._mode = mode
+ oplist, opmap, opname_by_index = import_configs(mode)
+ self._mapper = Mapper(opmap, oplist, opname_by_index)
+ self._nbackends = num_backends
+ self._extended_map = {}
+
+ def get_solution_spacelen(self):
+ if self._mode == "name":
+ return self._nbackends**len(self._mapper.get_oplist())
+ elif self._mode == "index":
+ return self._nbackends**max(self._mapper.get_opmap())
+ else:
+ print("Unknown mode ", mode, ", exiting profiler")
+ sys.exit(-1)
+
+ def get_nbits(self, extended_search_mode):
+ if self._mode == "index" and extended_search_mode == True:
+ return max(self._mapper.get_opmap())
+ else:
+ return len(self._mapper.get_oplist())
+
+ def get_mode_extended(self):
+ return (self._mode == "index")
+
+ def get_extended_solution(self, s):
+ if s in self._extended_map:
+ return self._extended_map[s]
+
+ extended_value = self._mapper.map_to_extended_space(s, self._nbackends)
+ self._extended_map[s] = extended_value
+ return extended_value
+
+ def run_inference(self, solution):
+ cmd_str = [
+ ". /tmp/envvars.sh && " + self._run_folder + "/nnpackage_run -w1 -r1 -m1 -l "
+ + self._model + "/metadata/tc/input.h5 " + self._model + " 2> /dev/null"
+ ]
+ res = exec_shell(cmd_str, newline_split=True)
+ try:
+ exec_time = float(res[4].split(' ')[-2])
+ max_rss = int(res[13].split(' ')[-2])
+ except IndexError:
+ print("got index error at config ", solution)
+ print("result: ", res)
+ print("####")
+ sys.exit(-1)
+ return (exec_time, max_rss)
+
+ def profile_by_opname(self, solution):
+ generate_vars(self._mapper.get_oplist(), solution, self._nbackends)
+ return self.run_inference(solution)
+
+ def profile_by_opindex(self, solution):
+ generate_vars_for_indx(self._mapper.get_opmap(), solution, self._nbackends)
+ return self.run_inference(solution)
+
+ def get_opconfig(self):
+ return self._mapper.get_oplist(), self._mapper.get_opmap(
+ ), self._mapper.get_opname_by_indx()
+
+ def dump_config(self, dumpdata):
+ if self._mode == "name":
+ dumpdata.update({'oplist': self._mapper.get_oplist()})
+ elif self._mode == "index":
+ dumpdata.update({'oplist': self._mapper.get_opmap()})
+
+ configs = {}
+ for solution in dumpdata['solutions']:
+ if self._mode == "name":
+ configs[int(solution["id"])] = fetch_config_by_name(
+ dumpdata['oplist'], solution["id"], self._nbackends)
+ elif self._mode == "index":
+ configs[int(solution["id"])] = fetch_config_by_indx(
+ dumpdata['oplist'], solution["id"], self._nbackends)
+ dumpdata.update({'configs': configs})
+ return dumpdata
--- /dev/null
+#! /usr/bin/python
+import subprocess
+import numpy as np
+import sys
+import os
+import json
+"""
+ General executor for bash-like shell. Supports multiline results.
+"""
+
+
+def exec_shell(command_str, newline_split=False):
+ result = subprocess.Popen(command_str, shell=True, stdout=subprocess.PIPE)
+ out, err = result.communicate()
+ if (newline_split):
+ res = out.decode("utf-8").split('\n')
+ res = res[:-1]
+ return res
+ else:
+ return out.decode("utf-8").split("\n")[0]
+
+
+"""
+ Given a number and its base, return its symbol-wise vector representation
+"""
+
+
+def int_to_vec(n, b, n_operations):
+ number_arr = np.zeros(n_operations, dtype=int)
+ i = n_operations - 1
+ while (n != 0):
+ number_arr[i] = n % b
+ n = n // b
+ i -= 1
+
+ return number_arr[::-1]
+
+
+"""
+ Generate onert backend mapping for each graph node, give the encoded information in parameters.
+ The details of the parameters are as follows:
+ 1. oplist - a vector that maps each graph node to a unique <operation name, data size> id
+ that was generated by an earlier script (gen_oplist.py)
+ 2. number - the encoded backend assignment, typically a very long integer
+ 3. base_value - for practical purposes, this is equivalent to the number of backends
+"""
+
+
+def generate_vars_for_indx(oplist, number, base_value):
+ ofile = open('/tmp/envvars.sh', 'w')
+ backend_map = {0: "=cpu", 1: "=acl_cl", 2: "=acl_neon"}
+
+ if (base_value == 2):
+ ofile.write("export BACKENDS=\"acl_cl;cpu\"")
+ elif (base_value == 3):
+ ofile.write("export BACKENDS=\"acl_cl;acl_neon;cpu\"")
+ ofile.write("\n")
+ number_arr = int_to_vec(number, base_value, len(oplist))
+ cnt = 0
+ op_backend_map_str = "export OP_BACKEND_MAP=\""
+ for cnt in range(len(oplist)):
+ backend_str = backend_map[int(number_arr[oplist[cnt]])]
+ op_backend_map_str += ''.join([str(cnt), backend_str])
+
+ if (cnt < (len(oplist) - 1)):
+ op_backend_map_str += ";"
+ else:
+ op_backend_map_str += "\""
+ ofile.write(op_backend_map_str)
+ ofile.write("\n")
+ ofile.close()
+
+
+"""
+ Print onert backend mapping for each graph node, give the encoded information in parameters.
+ The details of the parameters are as follows:
+ 1. oplist - a vector that maps each graph node to a unique <operation name, data size> id
+ that was generated by an earlier script (gen_oplist.py)
+ 2. number - the encoded backend assignment, typically a very long integer
+ 3. base_value - for practical purposes, this is equivalent to the number of backends
+"""
+
+
+def fetch_config_by_indx(oplist, number, base_value):
+ var_str = ""
+ backend_map = {0: "=cpu", 1: "=acl_cl", 2: "=acl_neon"}
+
+ if (base_value == 2):
+ var_str += "BACKENDS=\"acl_cl;cpu\""
+ elif (base_value == 3):
+ var_str += "BACKENDS=\"acl_cl;acl_neon;cpu\""
+ var_str += " "
+ number_arr = int_to_vec(number, base_value, len(oplist))
+ cnt = 0
+ var_str += "OP_BACKEND_MAP=\""
+ op_backend_map_str = ""
+ for cnt in range(len(oplist)):
+ backend_str = backend_map[int(number_arr[oplist[cnt]])]
+ op_backend_map_str += ''.join([str(cnt), backend_str])
+
+ if (cnt < (len(oplist) - 1)):
+ op_backend_map_str += ";"
+ else:
+ op_backend_map_str += "\""
+ var_str += op_backend_map_str
+ return var_str
+
+
+"""
+ Generate onert backend mapping for each graph operation name, give the encoded information in parameters.
+ The details of the parameters are as follows:
+ 1. oplist - a vector that maps each graph node to a unique operation name.
+ The list is generated by an earlier script (gen_oplist.py)
+ 2. number - the encoded backend assignment, typically a long integer
+ 3. base_value - for practical purposes, this is equivalent to the number of backends
+"""
+
+
+def generate_vars(oplist, number, base_value):
+ ofile = open('/tmp/envvars.sh', 'w')
+ backend_map = {0: "=cpu", 1: "=acl_cl", 2: "=acl_neon"}
+ if (base_value == 2):
+ ofile.write("export BACKENDS=\"acl_cl;cpu\"")
+ elif (base_value == 3):
+ ofile.write("export BACKENDS=\"acl_cl;acl_neon;cpu\"")
+ ofile.write("\n")
+ number_str = int_to_vec(number, base_value, len(oplist))
+
+ cnt = 0
+ for n in number_str:
+ op_backend_map_str = ''.join(
+ ["export OP_BACKEND_", oplist[cnt], backend_map[int(n)]])
+ ofile.write(op_backend_map_str)
+ ofile.write("\n")
+ cnt += 1
+ ofile.close()
+
+
+"""
+ Print onert backend mapping for each graph operation name, give the encoded information in parameters.
+ The details of the parameters are as follows:
+ 1. oplist - a vector that maps each graph node to a unique operation name.
+ The list is generated by an earlier script (gen_oplist.py)
+ 2. number - the encoded backend assignment, typically a long integer
+ 3. base_value - for practical purposes, this is equivalent to the number of backends
+"""
+
+
+def fetch_config_by_name(oplist, number, base_value):
+ var_str = ""
+ backend_map = {0: "=cpu", 1: "=acl_cl", 2: "=acl_neon"}
+ if (base_value == 2):
+ var_str += "BACKENDS=\"acl_cl;cpu\""
+ elif (base_value == 3):
+ var_str += "BACKENDS=\"acl_cl;acl_neon;cpu\""
+ var_str += " "
+
+ number_str = int_to_vec(number, base_value, len(oplist))
+
+ cnt = 0
+ for n in number_str:
+ var_str += ''.join(["OP_BACKEND_", oplist[cnt], backend_map[int(n)]])
+ var_str += " "
+ cnt += 1
+ return var_str
+
+
+"""
+ Import operation list, map and relevant information for profiling. Note: These information should have been
+ dumped under /tmp/oplist.json using the gen_oplist.py script.
+"""
+
+
+def import_configs(mode):
+ if not os.path.isfile('/tmp/oplist.json'):
+ print("No oplist")
+ sys.exit(-1)
+ with open('/tmp/oplist.json', 'r') as ifile:
+ data = json.load(ifile)
+ oplist = data['oplist']
+ if mode == "name":
+ nbits = len(oplist)
+ return oplist, None, None
+ elif mode == "index":
+ opmap = data['opmap']
+ opname_by_indx = data['opname_by_indx']
+ return oplist, opmap, opname_by_indx
+
+ print("mode is incorrect")
+ sys.exit(-1)
+
+
+"""
+ Generic Progress bar display
+"""
+
+
+def progressbar(current_cnt, max_cnt, prefix="", file=sys.stdout):
+ x = int(current_cnt * 100.0 / max_cnt)
+ file.write("%s[%s%s] %i/%i\r" % (prefix, "#" * x, "." * (100 - x), x, 100))
+ file.flush()
--- /dev/null
+#! /usr/bin/python
+import argparse
+import tensorflow as tf
+import sys
+sys.path.append("../estimator")
+import subprocess
+import os
+import json
+from functools import reduce
+from utils import exec_shell
+"""
+ Generates from a tflite model, a list of unique onert operation names used in the model
+"""
+
+
+def generate_oplist_by_name(tflite_file):
+ with open("operations_map.json") as ifile:
+ data = json.load(ifile)
+ op_dict = data['op_dict']
+
+ intr = tf.lite.Interpreter(tflite_file)
+ intr.allocate_tensors()
+ tf_opset = set(op['op_name'] for op in intr._get_ops_details())
+ try:
+ onert_ops = set([op_dict[op] for op in tf_opset])
+ except KeyError:
+ print("Invalid mapping, check your tensorflow ops for new/unknown mappings: ",
+ tf_opset)
+ sys.exit(-1)
+ return onert_ops
+
+
+"""
+ Returns the total data size for the model graph node (inputs + outputs)
+ Params:
+ op: operation instance (obtained from _get_ops_details())
+ tsr: tensor instance (obtained from get_tensor_details())
+"""
+
+
+def get_op_data_size(op, tsr):
+ data_size = 0
+ for idx in op['inputs']:
+ if tsr[idx]['shape'].size > 0:
+ data_size += reduce(lambda x, y: x * y,
+ tsr[idx]['shape']) * tsr[idx]['shape'].dtype.itemsize
+
+ for idx in op['outputs']:
+ if tsr[idx]['shape'].size > 0:
+ data_size += reduce(lambda x, y: x * y,
+ tsr[idx]['shape']) * tsr[idx]['shape'].dtype.itemsize
+ return data_size
+
+
+"""
+ Generates from a tflite model, the following outputs:
+ 1. opmap - a symbol/bit index mapping from every graph operation to a unique <operation name, data size> index identifier. This mapping
+ will be used later when profiling the model at runtime.
+
+ 2. oplist - a list of unique onert operation names used in the model
+
+ 3. opname_by_index - a list of onert operation names, indexed by their topological order in the model
+"""
+
+
+def generate_oplist_by_name_size(tflite_file):
+ intr = tf.lite.Interpreter(tflite_file)
+ intr.allocate_tensors()
+ ops = intr._get_ops_details()
+ tsr = intr.get_tensor_details()
+
+ opset = set()
+ oplist = set()
+ indx = []
+ opname_by_indx = []
+ # Fetch tensorflow operation mapping to onert kernels
+ with open("operations_map.json") as ifile:
+ data = json.load(ifile)
+ op_dict = data['op_dict']
+
+ # Fetch all unique operation names and <operation name, tensordata size> pairs
+ for op in ops:
+ opset.add((op['op_name'], get_op_data_size(op, tsr)))
+ oplist.add(op_dict[op['op_name']])
+ indx.append(op['index'])
+ opname_by_indx = [op_dict[ops[i]['op_name']] for i in indx]
+
+ # Create a 'm' bit/symbol map indexed by <opname, tensordata size> values
+ inv_opset_map = {}
+ i = 0
+ for op in opset:
+ inv_opset_map[op] = i
+ i += 1
+
+ # Map 'n' operation symbol space to 'm' <opname, tensordata size> space
+ op_map = []
+ for op in ops:
+ data_size = get_op_data_size(op, tsr)
+ op_map.append(inv_opset_map[(op['op_name'], data_size)])
+
+ return op_map, oplist, opname_by_indx
+
+
+"""
+Script to generate oplist, given the following details:
+1. Modelfile
+2. target device type
+3. Additional information, such as authetication for file tranfer
+
+Info: python gen_oplist.py --help
+"""
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description='''gen_backend: Generates oplist and uploads to target''',
+ epilog="""Success.""")
+ parser.add_argument(
+ '--auth', type=str, default=None, help='authentication: <user@host>')
+ parser.add_argument(
+ '--mode',
+ type=str.lower,
+ choices=["index", "name"],
+ default="name",
+ help='Profile by operation index or name')
+ parser.add_argument('model', type=str, default=None, help='tflite name with path')
+ parser.add_argument(
+ 'target',
+ type=str.lower,
+ choices=['tizen', 'odroid'],
+ default="odroid",
+ help='target name')
+
+ # Parse arguments
+ args = parser.parse_args()
+ modelfile = args.model
+ target = args.target
+ mode = args.mode
+ if target == "odroid":
+ auth_str = args.auth
+ if auth_str is None:
+ print("Need valid authentication")
+ sys.exit(-1)
+
+ # Generate oplist
+ if mode == "name":
+ opset = generate_oplist_by_name(modelfile)
+ print(opset)
+ with open('/tmp/oplist.json', 'w') as opfile:
+ data = {}
+ data['oplist'] = list(opset)
+ json.dump(data, opfile)
+ elif mode == "index":
+ data = {}
+ opmap, oplist, opname_by_indx = generate_oplist_by_name_size(modelfile)
+ data['opmap'] = opmap
+ data['oplist'] = list(oplist)
+ data['opname_by_indx'] = opname_by_indx
+ with open('/tmp/oplist.json', 'w') as opfile:
+ json.dump(data, opfile)
+ # Upload oplist to target
+ if target == "tizen":
+ exec_shell("sdb push /tmp/oplist.json /tmp/oplist.json")
+ elif target == "odroid":
+ print("auth_str = ", auth_str)
+ exec_shell("scp /tmp/oplist.json " + auth_str + ":/tmp/oplist.json")
+ print("done...")
--- /dev/null
+{ "op_dict": {
+ "SUM":"Reduce",
+ "ADD":"BinaryArithmetic",
+ "SUB":"BinaryArithmetic",
+ "DIV":"BinaryArithmetic",
+ "MUL":"BinaryArithmetic",
+ "REDUCE_MAX": "Reduce",
+ "REDUCE_MIN": "Reduce",
+ "CONV_2D": "Conv2D",
+ "PACK":"Pack",
+ "SOFTMAX":"Softmax",
+ "CONCATENATION":"Concat",
+ "EXP":"ElementwiseUnary",
+ "RESHAPE":"Reshape",
+ "SPLIT_V":"SplitV",
+ "ARG_MAX": "ArgMax",
+ "BATCH_TO_SPACE_ND":"BatchToSpaceND",
+ "DEPTHWISE_CONV_2D":"DepthwiseConv2D",
+ "LOGISTIC":"ElementwiseActivation",
+ "MEAN":"Reduce",
+ "RELU6":"ElementwiseActivation",
+ "RELU":"ElementwiseActivation",
+ "RESIZE_BILINEAR":"ResizeBilinear",
+ "REVERSE_V2":"Reverse",
+ "SPACE_TO_BATCH_ND":"SpaceToBatchND",
+ "AVERAGE_POOL_2D": "Pool2D",
+ "MAX_POOL_2D": "Pool2D",
+ "GATHER": "Gather",
+ "CAST": "ElementwiseUnary",
+ "FULLY_CONNECTED": "FullyConnected",
+ "PAD": "Pad",
+ "SLICE" : "Slice",
+ "STRIDED_SLICE": "StridedSlice",
+ "TRANSPOSE": "Transpose",
+ "UNPACK": "Unpack"
+}}
--- /dev/null
+# Stab - Static Backend Scheduler
+
+`Stab` is a tool to schedule backend for each opration using profiled data
+
+nnpackage with backend configuration will be created at `./tools/stab/nnpkg_sched`
+
+Supported backends : `cpu`, `ruy`, and `xnnpack`
+- Other backends will be supported when `stab` can measure and use permutation time between backends
+
+## Scheduling Process
+
+1. Upload ONE runtime and nnpackage to remote device
+ - Use `/tmp/ONE` folder on remote device
+1. Profile execution time of each backend on remote device
+1. Get profile result from remote device
+ - Profile result is saved at `./tools/stab/traces` on host
+1. Scheduler backend for each operation to get fastest inference time
+ - Use fastest backend for each operation
+1. Generate nnpackage with backend configuration
+ - Generated at `./tools/stab/nnpkg_sched`
+
+## Prerequisite
+
+- Install Python>=3. Tested on Python 3.6.9 and 3.7.5
+- Register SSH keys to use ssh commands without entering password
+ ```bash
+ ssh-keygen -t rsa
+ ssh-copy-id -i ~/.ssh/id_rsa.pub remote_user@remote_ip
+ ```
+
+## Usage
+
+```
+Usage: python3 ./tools/stab/stab.py --nnpackage nnpackage_dir --ip <IP>
+Runs nnpackage on remote device and create nnpackaged with scheduled backends
+
+required arguments:
+ --nnpackage NNPACKAGE
+ nnpackage folder to profile
+ --ip IP IP address of remote client
+
+optional arguments:
+ -h, --help show this help message and exit
+ -n NUM_THREADS, --num_threads NUM_THREADS
+ Number of threads used by one runtime
+ -u USER, --user USER User of remote client
+ -v, --verbose Print verbose message
+ --no-profile Disable profiling
+
+Examples:
+ python3 ./tools/stab/stab.py --nnpackage ../nnpkg_tst/inception --ip 1.1.1.1 => Profile on remote device 1.1.1.1 with current user
+ python3 ./tools/stab/stab.py --nnpackage ../nnpkg_tst/inception --ip 1.1.1.1 -n 4 => Profile on remote device 1.1.1.1 using 4 thread for ONE runtime
+ python3 ./tools/stab/stab.py --nnpackage ../nnpkg_tst/inception --ip 1.1.1.1 --user odroid => Profile on remote device 1.1.1.1 with user odroid
+```
--- /dev/null
+#!/usr/bin/env python3
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from op_list_parser import OpListParser
+from remote import RemoteSSH
+
+
+class BackendProfiler():
+ """
+ Run ONE runtime on remote device to create TRACE file which has operation execution time
+
+ TODO : Support Android device profiling
+ """
+
+ def __init__(self, user, ip, nnpackage_dir, num_threads):
+ self.remote_ssh = RemoteSSH(user, ip, nnpackage_dir, num_threads)
+ self.backend_op_list = OpListParser().parse()
+ self.backend_list = ["cpu"]
+ self.backend_list.extend([backend for backend in self.backend_op_list])
+
+ def sync(self):
+ logging.info("Upload ONE runtime and nnpackage to remote device")
+ self.remote_ssh.sync_binary()
+
+ def profile(self):
+ for backend in self.backend_list:
+ logging.info(f"Profiling {backend} backend")
+ self.remote_ssh.profile_backend(backend, self.backend_op_list)
+ self.remote_ssh.sync_trace(backend)
--- /dev/null
+#!/usr/bin/env python3
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json, logging
+from pathlib import Path
+from op_list_parser import OpListParser
+from nnpkg_helper import NnpkgHelper
+
+
+class BackendScheduler:
+ """
+ Read profiled data and select proper backend for each operation
+ Scheduled nnpackage is saved at ./tools/stab/nnpkg_sched
+
+ TODO : Use permutation time for better scheduling
+ """
+
+ def __init__(self, nnpkg_dir, num_threads):
+ self.nnpkg_dir = Path(nnpkg_dir).resolve()
+ self.num_threads = num_threads
+ self.root_path = Path(__file__).parents[2]
+ self.nnpkg_helper = NnpkgHelper()
+
+ def read_traces(self, backend_list):
+ op_time = {}
+ inference_time = {}
+ for backend in backend_list:
+ try:
+ # Trace file is located at ./tools/stab/traces
+ trace_path = Path(
+ __file__
+ ).parent / 'traces' / f"{self.nnpkg_dir.name}_{backend}_{self.num_threads}"
+ logging.debug(f"Trace path : {trace_path}")
+ with open(trace_path) as f:
+ data = json.load(f)
+ execution_data = data['Execution_Data']
+ for entry in execution_data:
+ if entry == "memory":
+ continue
+ elif entry == "runtime":
+ inference_time[backend] = execution_data['runtime']['Graph'][
+ 'Avg_Time']
+ continue
+ op_backend = entry
+ backend_data = execution_data[op_backend]
+ for op in backend_data:
+ op_index = int(op.split(' ')[2][1:])
+ op_type = op.split(' ')[-1]
+ time = int(backend_data[op]["Avg_Time"])
+ if op_index not in op_time.keys():
+ op_time[op_index] = {op_backend: time}
+ op_time[op_index].update({"type": op_type})
+ else:
+ op_time[op_index].update({op_backend: time})
+ except IOError as e:
+ logging.warning(e)
+ return op_time, inference_time
+
+ def schedule(self):
+ backend_op_list = OpListParser().parse()
+ backend_list = ["cpu"]
+ backend_list.extend([backend for backend in backend_op_list])
+
+ op_time, backend_infer_time = self.read_traces(backend_list)
+
+ backend_mapping = {}
+
+ target_ops = set()
+ for _, v in backend_op_list.items():
+ target_ops.update(v)
+
+ # Find fastest backend for each operation
+ for op_index, value in sorted(op_time.items()):
+ op_type = value['type']
+ if op_type not in target_ops:
+ continue
+
+ logging.debug(f"----- Operation {op_index} -----")
+ op_infer_time = 0
+ for backend in backend_list:
+ if backend not in value:
+ continue
+ backend_time = value[backend]
+
+ logging.debug(f"{backend}[{backend_time}]")
+ if op_infer_time == 0 or backend_time < op_infer_time:
+ op_infer_time = backend_time
+ backend_mapping[op_index] = backend
+
+ # Find default backend for Conv2D
+ default_backend = min(backend_infer_time, key=backend_infer_time.get)
+
+ # Create OP_BACKEND_MAP string
+ backend_conf = ""
+ for op_index, backend in sorted(backend_mapping.items()):
+ if backend != default_backend:
+ backend_conf += "{}={};".format(op_index, backend)
+
+ # Select fastet backend for each operation
+ logging.info("-------- Expected inference time ---------")
+ inference_time = {}
+ for backend in backend_list:
+ inference_time[backend] = 0
+ for op_index, value in sorted(op_time.items()):
+ if backend in value:
+ inference_time[backend] += value[backend]
+ else:
+ inference_time[backend] += value["cpu"]
+
+ schedule_time = 0
+ for op_index, value in sorted(op_time.items()):
+ op_type = value['type']
+ if op_type not in target_ops:
+ schedule_time += value["cpu"]
+ continue
+ else:
+ op_backend = backend_mapping[op_index]
+ schedule_time += value[op_backend]
+ if (default_backend != op_backend):
+ logging.debug("[{}] {} -> {} : {:.2f} ms decrease".format(
+ op_index, default_backend, op_backend,
+ (value[default_backend] - value[op_backend]) / 1000))
+
+ for backend in backend_list:
+ logging.info(f"{backend} backend : {inference_time[backend]/1000:.2f} ms")
+ logging.info(f"Backend scheduling : {schedule_time / 1000:.2f} ms")
+
+ logging.info("-------- Backend Scheduling --------")
+ cmd = []
+ cmd += [f"OP_BACKEND_MAP={backend_conf}"]
+ for target_backend, op_list in backend_op_list.items():
+ if default_backend == target_backend:
+ for op in op_list:
+ cmd += [f"OP_BACKEND_{op}={default_backend}"]
+ cmd += [f"BACKENDS={';'.join(backend_list)}"]
+ cmd += [f"RUY_THREADS={self.num_threads}"]
+ cmd += [f"XNNPACK_THREADS={self.num_threads}"]
+ logging.info(' '.join(cmd))
+
+ # Create nnpackage with backend mapping
+ dst_dir = Path(__file__).parent / 'nnpkg_sched' / self.nnpkg_dir.name
+ self.nnpkg_helper.copy(self.nnpkg_dir, dst_dir)
+ self.nnpkg_helper.add_config(dst_dir, cmd)
--- /dev/null
+#!/usr/bin/env python3
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json, logging
+from distutils.dir_util import copy_tree
+from pathlib import Path
+
+
+class NnpkgHelper:
+ """
+ Helper class for nnpackage
+ """
+
+ def __init__(self):
+ self.config_name = 'config.cfg'
+
+ def copy(self, src, dst):
+ copy_tree(str(src), str(dst))
+
+ def add_config(self, src, configs):
+ manifest_path = Path(src).resolve() / 'metadata' / 'MANIFEST'
+ config_path = Path(src).resolve() / 'metadata' / self.config_name
+
+ try:
+ # Read MANIFEST file
+ with open(manifest_path, 'r') as manifest_file:
+ data = json.load(manifest_file)
+
+ # Add configs to MANIFEST file
+ with open(manifest_path, 'w') as manifest_file:
+ data['configs'] = [self.config_name]
+ json.dump(data, manifest_file, indent=2)
+
+ # Write config.cfg file
+ with open(config_path, 'w') as config_file:
+ config_file.write('\n'.join(configs))
+
+ logging.info(f"Scheduled nnpackage is saved at {src}")
+
+ except IOError as e:
+ logging.warn(e)
+ except:
+ logging.warn("Error")
--- /dev/null
+ruy:Conv2D,FullyConnected
+xnnpack:Conv2D,DepthwiseConv2D,FullyConnected
--- /dev/null
+#!/usr/bin/env python3
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from pathlib import Path
+
+
+class OpListParser():
+ """
+ Read op_list.txt to create supported operation list for each backend
+
+ TODO : Reads supported tensor type for each operation (FP32 or INT8)
+ """
+
+ def __init__(self):
+ self.file_name = "op_list.txt"
+ self.op_list_file = Path(__file__).parent / self.file_name
+
+ def parse(self):
+ backend_op_list = {}
+ with open(self.op_list_file, 'r') as f:
+ lines = f.readlines()
+ for line in lines:
+ line = line.rstrip()
+ backend, _, op_list_str = line.partition(':')
+ op_list = op_list_str.split(',')
+ backend_op_list[backend] = op_list
+ return backend_op_list
--- /dev/null
+#!/usr/bin/env python3
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import subprocess, logging
+from pathlib import Path
+
+
+class RemoteSSH():
+ """
+ Execute commands on remove device using SSH
+
+ TODO : Using SSH library instead of direct ssh call
+ """
+
+ def __init__(self, user, ip, nnpkg_dir, num_threads):
+ self.base_dir = Path('/tmp/ONE')
+ self.trace_dir = 'traces'
+ self.host = f"{user}@{ip}" if user != None else ip
+ self.nnpkg_dir = Path(nnpkg_dir).resolve()
+ self.nnpkg_name = self.nnpkg_dir.name
+ self.root_path = Path(__file__).resolve().parents[2]
+ self.num_threads = num_threads
+
+ def sync_binary(self):
+ bin_dir = self.root_path / 'Product/armv7l-linux.release/out'
+ if (not bin_dir.is_dir()):
+ logging.warn(f"Build dir [{bin_dir}] is not exist")
+ exit()
+ elif (not self.nnpkg_dir.is_dir()):
+ logging.warn(f"nnpackage dir [{self.nnpkg_dir}] is not exist")
+ exit()
+ else:
+ # Create temporary folder
+ subprocess.call(
+ ["ssh", f"{self.host}", "mkdir", "-p", self.base_dir / self.trace_dir])
+ # Syne ONE runtime
+ subprocess.call([
+ "rsync", "-az", "--exclude", "test-suite.tar.gz", bin_dir,
+ self.remote(self.base_dir)
+ ])
+ # Sync target nnpackage
+ subprocess.call(["rsync", "-az", self.nnpkg_dir, self.remote(self.base_dir)])
+
+ def sync_trace(self, backend):
+ remote_trace_path = self.remote_trace_path(backend)
+ local_trace_path = self.local_trace_path(backend)
+ local_trace_path.parent.mkdir(parents=True, exist_ok=True)
+ logging.debug(f"Remote trace path : {self.remote(remote_trace_path)}")
+ logging.debug(f"Local trace path : {local_trace_path}")
+ # Sync trace file
+ subprocess.call(
+ ["rsync", "-az",
+ self.remote(remote_trace_path), local_trace_path])
+
+ def profile_backend(self, backend, backend_op_list):
+ nnpkg_run_path = self.base_dir / 'out/bin/nnpackage_run'
+ nnpkg_path = self.base_dir / self.nnpkg_dir.name
+
+ cmd = ["ssh", f"{self.host}"]
+ cmd += [f"TRACE_FILEPATH={self.remote_trace_path(backend)}"]
+ for target_backend, op_list in backend_op_list.items():
+ if backend == target_backend:
+ for op in op_list:
+ cmd += [f"OP_BACKEND_{op}={backend}"]
+ cmd += [f"XNNPACK_THREADS={self.num_threads}"]
+ cmd += [f"RUY_THREADS={self.num_threads}"]
+ cmd += [f"BACKENDS=\'{';'.join(['cpu', backend])}\'"]
+ cmd += [f"{nnpkg_run_path}"]
+ cmd += [f"--nnpackage"]
+ cmd += [f"{nnpkg_path}"]
+ cmd += [f"-w5 -r50"]
+ logging.debug(f"SSH command : {' '.join(cmd)}")
+ subprocess.call(cmd)
+
+ def base_path():
+ pass
+
+ def remote(self, path):
+ return f"{self.host}:{path}"
+
+ # TODO Create class for path generation
+ def trace_name(self, backend):
+ return f"{self.nnpkg_name}_{backend}_{self.num_threads}"
+
+ def remote_trace_path(self, backend):
+ return self.base_dir / self.trace_dir / self.trace_name(backend)
+
+ def local_trace_path(self, backend):
+ return Path(__file__).parent / self.trace_dir / self.trace_name(backend)
--- /dev/null
+#!/usr/bin/env python3
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse, logging, sys
+from backend_profiler import BackendProfiler
+from backend_scheduler import BackendScheduler
+
+
+def main(args):
+ if args.profile:
+ backend_profiler = BackendProfiler(args.user, args.ip, args.nnpackage,
+ args.num_threads)
+ backend_profiler.sync()
+ backend_profiler.profile()
+ backend_scheduler = BackendScheduler(args.nnpackage, args.num_threads)
+ backend_scheduler.schedule()
+
+
+if __name__ == "__main__":
+ arg_parser = argparse.ArgumentParser(add_help=False)
+ required = arg_parser.add_argument_group('required arguments')
+ optional = arg_parser.add_argument_group('optional arguments')
+
+ # Add back help
+ optional.add_argument(
+ '-h',
+ '--help',
+ action='help',
+ default=argparse.SUPPRESS,
+ help='show this help message and exit')
+ required.add_argument(
+ "--nnpackage", type=str, required=True, help="nnpackage folder to profile")
+ required.add_argument(
+ "--ip", type=str, required=True, help="IP address of remote client")
+ optional.add_argument(
+ "-n",
+ "--num_threads",
+ type=int,
+ default=1,
+ help="Number of threads used by one runtime")
+ optional.add_argument("-u", "--user", type=str, help="User of remote client")
+ optional.add_argument(
+ "-v",
+ "--verbose",
+ action='store_const',
+ dest="verbose_level",
+ default=logging.INFO,
+ const=logging.DEBUG,
+ help="Print verbose message")
+ optional.add_argument(
+ "--no-profile", dest='profile', action='store_false', help="Disable profiling")
+ optional.set_defaults(profile=True)
+ args = arg_parser.parse_args()
+
+ logging.basicConfig(
+ stream=sys.stdout,
+ level=args.verbose_level,
+ format="[%(levelname).5s] %(message)s")
+
+ main(args)
tflite.QuantizationParameters.QuantizationParametersAddZeroPoint(
new_builder, new_zeropoint)
+ quantized_dimension = selected_quantization.QuantizedDimension()
+ if quantized_dimension != 0:
+ tflite.QuantizationParameters.QuantizationParametersAddQuantizedDimension(
+ new_builder, quantized_dimension)
+
return tflite.QuantizationParameters.QuantizationParametersEnd(new_builder)